Skip to content

Commit 9e0b8c8

Browse files
Merge pull request from GHSA-4f7p-27jc-3c36
Fix for HTTP request smuggling due to incorrect validation
2 parents 22c0394 + b28c9e8 commit 9e0b8c8

File tree

9 files changed

+202
-44
lines changed

9 files changed

+202
-44
lines changed

Diff for: CHANGES.txt

+26
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,29 @@
1+
2.1.1
2+
-----
3+
4+
Security Bugfix
5+
~~~~~~~~~~~~~~~
6+
7+
- Waitress now validates that chunked encoding extensions are valid, and don't
8+
contain invalid characters that are not allowed. They are still skipped/not
9+
processed, but if they contain invalid data we no longer continue in and
10+
return a 400 Bad Request. This stops potential HTTP desync/HTTP request
11+
smuggling. Thanks to Zhang Zeyu for reporting this issue. See
12+
https://github.com/Pylons/waitress/security/advisories/GHSA-4f7p-27jc-3c36
13+
14+
- Waitress now validates that the chunk length is only valid hex digits when
15+
parsing chunked encoding, and values such as ``0x01`` and ``+01`` are no
16+
longer supported. This stops potential HTTP desync/HTTP request smuggling.
17+
Thanks to Zhang Zeyu for reporting this issue. See
18+
https://github.com/Pylons/waitress/security/advisories/GHSA-4f7p-27jc-3c36
19+
20+
- Waitress now validates that the Content-Length sent by a remote contains only
21+
digits in accordance with RFC7230 and will return a 400 Bad Request when the
22+
Content-Length header contains invalid data, such as ``+10`` which would
23+
previously get parsed as ``10`` and accepted. This stops potential HTTP
24+
desync/HTTP request smuggling Thanks to Zhang Zeyu for reporting this issue. See
25+
https://github.com/Pylons/waitress/security/advisories/GHSA-4f7p-27jc-3c36
26+
127
2.1.0
228
-----
329

Diff for: setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = waitress
3-
version = 2.1.0
3+
version = 2.1.1
44
description = Waitress WSGI server
55
long_description = file: README.rst, CHANGES.txt
66
long_description_content_type = text/x-rst

Diff for: src/waitress/parser.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
from waitress.buffers import OverflowableBuffer
2525
from waitress.receiver import ChunkedReceiver, FixedStreamReceiver
26+
from waitress.rfc7230 import HEADER_FIELD_RE, ONLY_DIGIT_RE
2627
from waitress.utilities import (
2728
BadRequest,
2829
RequestEntityTooLarge,
@@ -31,8 +32,6 @@
3132
find_double_newline,
3233
)
3334

34-
from .rfc7230 import HEADER_FIELD
35-
3635

3736
def unquote_bytes_to_wsgi(bytestring):
3837
return unquote_to_bytes(bytestring).decode("latin-1")
@@ -221,7 +220,7 @@ def parse_header(self, header_plus):
221220
headers = self.headers
222221

223222
for line in lines:
224-
header = HEADER_FIELD.match(line)
223+
header = HEADER_FIELD_RE.match(line)
225224

226225
if not header:
227226
raise ParsingError("Invalid header")
@@ -317,11 +316,12 @@ def parse_header(self, header_plus):
317316
self.connection_close = True
318317

319318
if not self.chunked:
320-
try:
321-
cl = int(headers.get("CONTENT_LENGTH", 0))
322-
except ValueError:
319+
cl = headers.get("CONTENT_LENGTH", "0")
320+
321+
if not ONLY_DIGIT_RE.match(cl.encode("latin-1")):
323322
raise ParsingError("Content-Length is invalid")
324323

324+
cl = int(cl)
325325
self.content_length = cl
326326

327327
if cl > 0:

Diff for: src/waitress/receiver.py

+21-7
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"""Data Chunk Receiver
1515
"""
1616

17+
from waitress.rfc7230 import CHUNK_EXT_RE, ONLY_HEXDIG_RE
1718
from waitress.utilities import BadRequest, find_double_newline
1819

1920

@@ -110,6 +111,7 @@ def received(self, s):
110111
s = b""
111112
else:
112113
self.chunk_end = b""
114+
113115
if pos == 0:
114116
# Chop off the terminating CR LF from the chunk
115117
s = s[2:]
@@ -133,20 +135,32 @@ def received(self, s):
133135
line = s[:pos]
134136
s = s[pos + 2 :]
135137
self.control_line = b""
136-
line = line.strip()
137138

138139
if line:
139140
# Begin a new chunk.
140141
semi = line.find(b";")
141142

142143
if semi >= 0:
143-
# discard extension info.
144+
extinfo = line[semi:]
145+
valid_ext_info = CHUNK_EXT_RE.match(extinfo)
146+
147+
if not valid_ext_info:
148+
self.error = BadRequest("Invalid chunk extension")
149+
self.all_chunks_received = True
150+
151+
break
152+
144153
line = line[:semi]
145-
try:
146-
sz = int(line.strip(), 16) # hexadecimal
147-
except ValueError: # garbage in input
148-
self.error = BadRequest("garbage in chunked encoding input")
149-
sz = 0
154+
155+
if not ONLY_HEXDIG_RE.match(line):
156+
self.error = BadRequest("Invalid chunk size")
157+
self.all_chunks_received = True
158+
159+
break
160+
161+
# Can not fail due to matching against the regular
162+
# expression above
163+
sz = int(line, 16) # hexadecimal
150164

151165
if sz > 0:
152166
# Start a new chunk.

Diff for: src/waitress/rfc7230.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66
import re
77

8+
HEXDIG = "[0-9a-fA-F]"
9+
DIGIT = "[0-9]"
10+
811
WS = "[ \t]"
912
OWS = WS + "{0,}?"
1013
RWS = WS + "{1,}?"
@@ -25,6 +28,12 @@
2528
# ; visible (printing) characters
2629
VCHAR = r"\x21-\x7e"
2730

31+
# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
32+
QDTEXT = "[\t \x21\x23-\x5b\\\x5d-\x7e" + OBS_TEXT + "]"
33+
34+
QUOTED_PAIR = r"\\" + "([\t " + VCHAR + OBS_TEXT + "])"
35+
QUOTED_STRING = '"(?:(?:' + QDTEXT + ")|(?:" + QUOTED_PAIR + '))*"'
36+
2837
# header-field = field-name ":" OWS field-value OWS
2938
# field-name = token
3039
# field-value = *( field-content / obs-fold )
@@ -43,8 +52,24 @@
4352
# Which allows the field value here to just see if there is even a value in the first place
4453
FIELD_VALUE = "(?:" + FIELD_CONTENT + ")?"
4554

46-
HEADER_FIELD = re.compile(
55+
# chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
56+
# chunk-ext-name = token
57+
# chunk-ext-val = token / quoted-string
58+
59+
CHUNK_EXT_NAME = TOKEN
60+
CHUNK_EXT_VAL = "(?:" + TOKEN + ")|(?:" + QUOTED_STRING + ")"
61+
CHUNK_EXT = (
62+
"(?:;(?P<extension>" + CHUNK_EXT_NAME + ")(?:=(?P<value>" + CHUNK_EXT_VAL + "))?)*"
63+
)
64+
65+
# Pre-compiled regular expressions for use elsewhere
66+
ONLY_HEXDIG_RE = re.compile(("^" + HEXDIG + "+$").encode("latin-1"))
67+
ONLY_DIGIT_RE = re.compile(("^" + DIGIT + "+$").encode("latin-1"))
68+
HEADER_FIELD_RE = re.compile(
4769
(
4870
"^(?P<name>" + TOKEN + "):" + OWS + "(?P<value>" + FIELD_VALUE + ")" + OWS + "$"
4971
).encode("latin-1")
5072
)
73+
QUOTED_PAIR_RE = re.compile(QUOTED_PAIR)
74+
QUOTED_STRING_RE = re.compile(QUOTED_STRING)
75+
CHUNK_EXT_RE = re.compile(("^" + CHUNK_EXT + "$").encode("latin-1"))

Diff for: src/waitress/utilities.py

+3-25
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import stat
2323
import time
2424

25-
from .rfc7230 import OBS_TEXT, VCHAR
25+
from .rfc7230 import QUOTED_PAIR_RE, QUOTED_STRING_RE
2626

2727
logger = logging.getLogger("waitress")
2828
queue_logger = logging.getLogger("waitress.queue")
@@ -216,40 +216,18 @@ def parse_http_date(d):
216216
return retval
217217

218218

219-
# RFC 5234 Appendix B.1 "Core Rules":
220-
# VCHAR = %x21-7E
221-
# ; visible (printing) characters
222-
vchar_re = VCHAR
223-
224-
# RFC 7230 Section 3.2.6 "Field Value Components":
225-
# quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
226-
# qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
227-
# obs-text = %x80-FF
228-
# quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
229-
obs_text_re = OBS_TEXT
230-
231-
# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
232-
qdtext_re = "[\t \x21\x23-\x5b\\\x5d-\x7e" + obs_text_re + "]"
233-
234-
quoted_pair_re = r"\\" + "([\t " + vchar_re + obs_text_re + "])"
235-
quoted_string_re = '"(?:(?:' + qdtext_re + ")|(?:" + quoted_pair_re + '))*"'
236-
237-
quoted_string = re.compile(quoted_string_re)
238-
quoted_pair = re.compile(quoted_pair_re)
239-
240-
241219
def undquote(value):
242220
if value.startswith('"') and value.endswith('"'):
243221
# So it claims to be DQUOTE'ed, let's validate that
244-
matches = quoted_string.match(value)
222+
matches = QUOTED_STRING_RE.match(value)
245223

246224
if matches and matches.end() == len(value):
247225
# Remove the DQUOTE's from the value
248226
value = value[1:-1]
249227

250228
# Remove all backslashes that are followed by a valid vchar or
251229
# obs-text
252-
value = quoted_pair.sub(r"\1", value)
230+
value = QUOTED_PAIR_RE.sub(r"\1", value)
253231

254232
return value
255233
elif not value.startswith('"') and not value.endswith('"'):

Diff for: tests/test_functional.py

+47-3
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ def test_chunking_request_without_content(self):
322322
self.assertFalse("transfer-encoding" in headers)
323323

324324
def test_chunking_request_with_content(self):
325-
control_line = b"20;\r\n" # 20 hex = 32 dec
325+
control_line = b"20\r\n" # 20 hex = 32 dec
326326
s = b"This string has 32 characters.\r\n"
327327
expected = s * 12
328328
header = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
@@ -341,7 +341,7 @@ def test_chunking_request_with_content(self):
341341
self.assertFalse("transfer-encoding" in headers)
342342

343343
def test_broken_chunked_encoding(self):
344-
control_line = b"20;\r\n" # 20 hex = 32 dec
344+
control_line = b"20\r\n" # 20 hex = 32 dec
345345
s = b"This string has 32 characters.\r\n"
346346
to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
347347
to_send += control_line + s + b"\r\n"
@@ -364,8 +364,52 @@ def test_broken_chunked_encoding(self):
364364
self.send_check_error(to_send)
365365
self.assertRaises(ConnectionClosed, read_http, fp)
366366

367+
def test_broken_chunked_encoding_invalid_hex(self):
368+
control_line = b"0x20\r\n" # 20 hex = 32 dec
369+
s = b"This string has 32 characters.\r\n"
370+
to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
371+
to_send += control_line + s + b"\r\n"
372+
self.connect()
373+
self.sock.send(to_send)
374+
with self.sock.makefile("rb", 0) as fp:
375+
line, headers, response_body = read_http(fp)
376+
self.assertline(line, "400", "Bad Request", "HTTP/1.1")
377+
cl = int(headers["content-length"])
378+
self.assertEqual(cl, len(response_body))
379+
self.assertIn(b"Invalid chunk size", response_body)
380+
self.assertEqual(
381+
sorted(headers.keys()),
382+
["connection", "content-length", "content-type", "date", "server"],
383+
)
384+
self.assertEqual(headers["content-type"], "text/plain")
385+
# connection has been closed
386+
self.send_check_error(to_send)
387+
self.assertRaises(ConnectionClosed, read_http, fp)
388+
389+
def test_broken_chunked_encoding_invalid_extension(self):
390+
control_line = b"20;invalid=\r\n" # 20 hex = 32 dec
391+
s = b"This string has 32 characters.\r\n"
392+
to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
393+
to_send += control_line + s + b"\r\n"
394+
self.connect()
395+
self.sock.send(to_send)
396+
with self.sock.makefile("rb", 0) as fp:
397+
line, headers, response_body = read_http(fp)
398+
self.assertline(line, "400", "Bad Request", "HTTP/1.1")
399+
cl = int(headers["content-length"])
400+
self.assertEqual(cl, len(response_body))
401+
self.assertIn(b"Invalid chunk extension", response_body)
402+
self.assertEqual(
403+
sorted(headers.keys()),
404+
["connection", "content-length", "content-type", "date", "server"],
405+
)
406+
self.assertEqual(headers["content-type"], "text/plain")
407+
# connection has been closed
408+
self.send_check_error(to_send)
409+
self.assertRaises(ConnectionClosed, read_http, fp)
410+
367411
def test_broken_chunked_encoding_missing_chunk_end(self):
368-
control_line = b"20;\r\n" # 20 hex = 32 dec
412+
control_line = b"20\r\n" # 20 hex = 32 dec
369413
s = b"This string has 32 characters.\r\n"
370414
to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
371415
to_send += control_line + s

Diff for: tests/test_parser.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def test_received_chunked_completed_sets_content_length(self):
155155
b"Transfer-Encoding: chunked\r\n"
156156
b"X-Foo: 1\r\n"
157157
b"\r\n"
158-
b"1d;\r\n"
158+
b"1d\r\n"
159159
b"This string has 29 characters\r\n"
160160
b"0\r\n\r\n"
161161
)
@@ -193,6 +193,26 @@ def test_parse_header_bad_content_length(self):
193193
else: # pragma: nocover
194194
self.assertTrue(False)
195195

196+
def test_parse_header_bad_content_length_plus(self):
197+
data = b"GET /foobar HTTP/8.4\r\ncontent-length: +10\r\n"
198+
199+
try:
200+
self.parser.parse_header(data)
201+
except ParsingError as e:
202+
self.assertIn("Content-Length is invalid", e.args[0])
203+
else: # pragma: nocover
204+
self.assertTrue(False)
205+
206+
def test_parse_header_bad_content_length_minus(self):
207+
data = b"GET /foobar HTTP/8.4\r\ncontent-length: -10\r\n"
208+
209+
try:
210+
self.parser.parse_header(data)
211+
except ParsingError as e:
212+
self.assertIn("Content-Length is invalid", e.args[0])
213+
else: # pragma: nocover
214+
self.assertTrue(False)
215+
196216
def test_parse_header_multiple_content_length(self):
197217
data = b"GET /foobar HTTP/8.4\r\ncontent-length: 10\r\ncontent-length: 20\r\n"
198218

0 commit comments

Comments
 (0)