Merge pull request from GHSA-4f7p-27jc-3c36

digitalresistor · web-flow · commit 9e0b8c801e4d · 2022-03-16T15:26:15.000-06:00
Fix for HTTP request smuggling due to incorrect validation
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -1,3 +1,29 @@
+2.1.1
+-----
+
+Security Bugfix
+~~~~~~~~~~~~~~~
+
+- Waitress now validates that chunked encoding extensions are valid, and don't
+  contain invalid characters that are not allowed. They are still skipped/not
+  processed, but if they contain invalid data we no longer continue in and
+  return a 400 Bad Request. This stops potential HTTP desync/HTTP request
+  smuggling. Thanks to Zhang Zeyu for reporting this issue. See
+  https://github.com/Pylons/waitress/security/advisories/GHSA-4f7p-27jc-3c36
+
+- Waitress now validates that the chunk length is only valid hex digits when
+  parsing chunked encoding, and values such as ``0x01`` and ``+01`` are no
+  longer supported. This stops potential HTTP desync/HTTP request smuggling.
+  Thanks to Zhang Zeyu for reporting this issue. See
+  https://github.com/Pylons/waitress/security/advisories/GHSA-4f7p-27jc-3c36
+
+- Waitress now validates that the Content-Length sent by a remote contains only
+  digits in accordance with RFC7230 and will return a 400 Bad Request when the
+  Content-Length header contains invalid data, such as ``+10`` which would
+  previously get parsed as ``10`` and accepted. This stops potential HTTP
+  desync/HTTP request smuggling Thanks to Zhang Zeyu for reporting this issue. See
+  https://github.com/Pylons/waitress/security/advisories/GHSA-4f7p-27jc-3c36
+
 2.1.0
 -----
 
diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = waitress
-version = 2.1.0
+version = 2.1.1
 description = Waitress WSGI server
 long_description = file: README.rst, CHANGES.txt
 long_description_content_type = text/x-rst
diff --git a/src/waitress/parser.py b/src/waitress/parser.py
@@ -23,6 +23,7 @@
 
 from waitress.buffers import OverflowableBuffer
 from waitress.receiver import ChunkedReceiver, FixedStreamReceiver
+from waitress.rfc7230 import HEADER_FIELD_RE, ONLY_DIGIT_RE
 from waitress.utilities import (
     BadRequest,
     RequestEntityTooLarge,
@@ -31,8 +32,6 @@
     find_double_newline,
 )
 
-from .rfc7230 import HEADER_FIELD
-
 
 def unquote_bytes_to_wsgi(bytestring):
     return unquote_to_bytes(bytestring).decode("latin-1")
@@ -221,7 +220,7 @@ def parse_header(self, header_plus):
         headers = self.headers
 
         for line in lines:
-            header = HEADER_FIELD.match(line)
+            header = HEADER_FIELD_RE.match(line)
 
             if not header:
                 raise ParsingError("Invalid header")
@@ -317,11 +316,12 @@ def parse_header(self, header_plus):
                 self.connection_close = True
 
         if not self.chunked:
-            try:
-                cl = int(headers.get("CONTENT_LENGTH", 0))
-            except ValueError:
+            cl = headers.get("CONTENT_LENGTH", "0")
+
+            if not ONLY_DIGIT_RE.match(cl.encode("latin-1")):
                 raise ParsingError("Content-Length is invalid")
 
+            cl = int(cl)
             self.content_length = cl
 
             if cl > 0:
diff --git a/src/waitress/receiver.py b/src/waitress/receiver.py
@@ -14,6 +14,7 @@
 """Data Chunk Receiver
 """
 
+from waitress.rfc7230 import CHUNK_EXT_RE, ONLY_HEXDIG_RE
 from waitress.utilities import BadRequest, find_double_newline
 
 
@@ -110,6 +111,7 @@ def received(self, s):
                     s = b""
                 else:
                     self.chunk_end = b""
+
                     if pos == 0:
                         # Chop off the terminating CR LF from the chunk
                         s = s[2:]
@@ -133,20 +135,32 @@ def received(self, s):
                     line = s[:pos]
                     s = s[pos + 2 :]
                     self.control_line = b""
-                    line = line.strip()
 
                     if line:
                         # Begin a new chunk.
                         semi = line.find(b";")
 
                         if semi >= 0:
-                            # discard extension info.
+                            extinfo = line[semi:]
+                            valid_ext_info = CHUNK_EXT_RE.match(extinfo)
+
+                            if not valid_ext_info:
+                                self.error = BadRequest("Invalid chunk extension")
+                                self.all_chunks_received = True
+
+                                break
+
                             line = line[:semi]
-                        try:
-                            sz = int(line.strip(), 16)  # hexadecimal
-                        except ValueError:  # garbage in input
-                            self.error = BadRequest("garbage in chunked encoding input")
-                            sz = 0
+
+                        if not ONLY_HEXDIG_RE.match(line):
+                            self.error = BadRequest("Invalid chunk size")
+                            self.all_chunks_received = True
+
+                            break
+
+                        # Can not fail due to matching against the regular
+                        # expression above
+                        sz = int(line, 16)  # hexadecimal
 
                         if sz > 0:
                             # Start a new chunk.
diff --git a/src/waitress/rfc7230.py b/src/waitress/rfc7230.py
@@ -5,6 +5,9 @@
 
 import re
 
+HEXDIG = "[0-9a-fA-F]"
+DIGIT = "[0-9]"
+
 WS = "[ \t]"
 OWS = WS + "{0,}?"
 RWS = WS + "{1,}?"
@@ -25,6 +28,12 @@
 #                  ; visible (printing) characters
 VCHAR = r"\x21-\x7e"
 
+# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
+QDTEXT = "[\t \x21\x23-\x5b\\\x5d-\x7e" + OBS_TEXT + "]"
+
+QUOTED_PAIR = r"\\" + "([\t " + VCHAR + OBS_TEXT + "])"
+QUOTED_STRING = '"(?:(?:' + QDTEXT + ")|(?:" + QUOTED_PAIR + '))*"'
+
 # header-field   = field-name ":" OWS field-value OWS
 # field-name     = token
 # field-value    = *( field-content / obs-fold )
@@ -43,8 +52,24 @@
 # Which allows the field value here to just see if there is even a value in the first place
 FIELD_VALUE = "(?:" + FIELD_CONTENT + ")?"
 
-HEADER_FIELD = re.compile(
+# chunk-ext      = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
+# chunk-ext-name = token
+# chunk-ext-val  = token / quoted-string
+
+CHUNK_EXT_NAME = TOKEN
+CHUNK_EXT_VAL = "(?:" + TOKEN + ")|(?:" + QUOTED_STRING + ")"
+CHUNK_EXT = (
+    "(?:;(?P<extension>" + CHUNK_EXT_NAME + ")(?:=(?P<value>" + CHUNK_EXT_VAL + "))?)*"
+)
+
+# Pre-compiled regular expressions for use elsewhere
+ONLY_HEXDIG_RE = re.compile(("^" + HEXDIG + "+$").encode("latin-1"))
+ONLY_DIGIT_RE = re.compile(("^" + DIGIT + "+$").encode("latin-1"))
+HEADER_FIELD_RE = re.compile(
     (
         "^(?P<name>" + TOKEN + "):" + OWS + "(?P<value>" + FIELD_VALUE + ")" + OWS + "$"
     ).encode("latin-1")
 )
+QUOTED_PAIR_RE = re.compile(QUOTED_PAIR)
+QUOTED_STRING_RE = re.compile(QUOTED_STRING)
+CHUNK_EXT_RE = re.compile(("^" + CHUNK_EXT + "$").encode("latin-1"))
diff --git a/src/waitress/utilities.py b/src/waitress/utilities.py
@@ -22,7 +22,7 @@
 import stat
 import time
 
-from .rfc7230 import OBS_TEXT, VCHAR
+from .rfc7230 import QUOTED_PAIR_RE, QUOTED_STRING_RE
 
 logger = logging.getLogger("waitress")
 queue_logger = logging.getLogger("waitress.queue")
@@ -216,40 +216,18 @@ def parse_http_date(d):
     return retval
 
 
-# RFC 5234 Appendix B.1 "Core Rules":
-# VCHAR         =  %x21-7E
-#                  ; visible (printing) characters
-vchar_re = VCHAR
-
-# RFC 7230 Section 3.2.6 "Field Value Components":
-# quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
-# qdtext        = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
-# obs-text      = %x80-FF
-# quoted-pair   = "\" ( HTAB / SP / VCHAR / obs-text )
-obs_text_re = OBS_TEXT
-
-# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
-qdtext_re = "[\t \x21\x23-\x5b\\\x5d-\x7e" + obs_text_re + "]"
-
-quoted_pair_re = r"\\" + "([\t " + vchar_re + obs_text_re + "])"
-quoted_string_re = '"(?:(?:' + qdtext_re + ")|(?:" + quoted_pair_re + '))*"'
-
-quoted_string = re.compile(quoted_string_re)
-quoted_pair = re.compile(quoted_pair_re)
-
-
 def undquote(value):
     if value.startswith('"') and value.endswith('"'):
         # So it claims to be DQUOTE'ed, let's validate that
-        matches = quoted_string.match(value)
+        matches = QUOTED_STRING_RE.match(value)
 
         if matches and matches.end() == len(value):
             # Remove the DQUOTE's from the value
             value = value[1:-1]
 
             # Remove all backslashes that are followed by a valid vchar or
             # obs-text
-            value = quoted_pair.sub(r"\1", value)
+            value = QUOTED_PAIR_RE.sub(r"\1", value)
 
             return value
     elif not value.startswith('"') and not value.endswith('"'):
diff --git a/tests/test_functional.py b/tests/test_functional.py
@@ -322,7 +322,7 @@ def test_chunking_request_without_content(self):
             self.assertFalse("transfer-encoding" in headers)
 
     def test_chunking_request_with_content(self):
-        control_line = b"20;\r\n"  # 20 hex = 32 dec
+        control_line = b"20\r\n"  # 20 hex = 32 dec
         s = b"This string has 32 characters.\r\n"
         expected = s * 12
         header = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
@@ -341,7 +341,7 @@ def test_chunking_request_with_content(self):
             self.assertFalse("transfer-encoding" in headers)
 
     def test_broken_chunked_encoding(self):
-        control_line = b"20;\r\n"  # 20 hex = 32 dec
+        control_line = b"20\r\n"  # 20 hex = 32 dec
         s = b"This string has 32 characters.\r\n"
         to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
         to_send += control_line + s + b"\r\n"
@@ -364,8 +364,52 @@ def test_broken_chunked_encoding(self):
             self.send_check_error(to_send)
             self.assertRaises(ConnectionClosed, read_http, fp)
 
+    def test_broken_chunked_encoding_invalid_hex(self):
+        control_line = b"0x20\r\n"  # 20 hex = 32 dec
+        s = b"This string has 32 characters.\r\n"
+        to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
+        to_send += control_line + s + b"\r\n"
+        self.connect()
+        self.sock.send(to_send)
+        with self.sock.makefile("rb", 0) as fp:
+            line, headers, response_body = read_http(fp)
+            self.assertline(line, "400", "Bad Request", "HTTP/1.1")
+            cl = int(headers["content-length"])
+            self.assertEqual(cl, len(response_body))
+            self.assertIn(b"Invalid chunk size", response_body)
+            self.assertEqual(
+                sorted(headers.keys()),
+                ["connection", "content-length", "content-type", "date", "server"],
+            )
+            self.assertEqual(headers["content-type"], "text/plain")
+            # connection has been closed
+            self.send_check_error(to_send)
+            self.assertRaises(ConnectionClosed, read_http, fp)
+
+    def test_broken_chunked_encoding_invalid_extension(self):
+        control_line = b"20;invalid=\r\n"  # 20 hex = 32 dec
+        s = b"This string has 32 characters.\r\n"
+        to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
+        to_send += control_line + s + b"\r\n"
+        self.connect()
+        self.sock.send(to_send)
+        with self.sock.makefile("rb", 0) as fp:
+            line, headers, response_body = read_http(fp)
+            self.assertline(line, "400", "Bad Request", "HTTP/1.1")
+            cl = int(headers["content-length"])
+            self.assertEqual(cl, len(response_body))
+            self.assertIn(b"Invalid chunk extension", response_body)
+            self.assertEqual(
+                sorted(headers.keys()),
+                ["connection", "content-length", "content-type", "date", "server"],
+            )
+            self.assertEqual(headers["content-type"], "text/plain")
+            # connection has been closed
+            self.send_check_error(to_send)
+            self.assertRaises(ConnectionClosed, read_http, fp)
+
     def test_broken_chunked_encoding_missing_chunk_end(self):
-        control_line = b"20;\r\n"  # 20 hex = 32 dec
+        control_line = b"20\r\n"  # 20 hex = 32 dec
         s = b"This string has 32 characters.\r\n"
         to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
         to_send += control_line + s
diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -155,7 +155,7 @@ def test_received_chunked_completed_sets_content_length(self):
             b"Transfer-Encoding: chunked\r\n"
             b"X-Foo: 1\r\n"
             b"\r\n"
-            b"1d;\r\n"
+            b"1d\r\n"
             b"This string has 29 characters\r\n"
             b"0\r\n\r\n"
         )
@@ -193,6 +193,26 @@ def test_parse_header_bad_content_length(self):
         else:  # pragma: nocover
             self.assertTrue(False)
 
+    def test_parse_header_bad_content_length_plus(self):
+        data = b"GET /foobar HTTP/8.4\r\ncontent-length: +10\r\n"
+
+        try:
+            self.parser.parse_header(data)
+        except ParsingError as e:
+            self.assertIn("Content-Length is invalid", e.args[0])
+        else:  # pragma: nocover
+            self.assertTrue(False)
+
+    def test_parse_header_bad_content_length_minus(self):
+        data = b"GET /foobar HTTP/8.4\r\ncontent-length: -10\r\n"
+
+        try:
+            self.parser.parse_header(data)
+        except ParsingError as e:
+            self.assertIn("Content-Length is invalid", e.args[0])
+        else:  # pragma: nocover
+            self.assertTrue(False)
+
     def test_parse_header_multiple_content_length(self):
         data = b"GET /foobar HTTP/8.4\r\ncontent-length: 10\r\ncontent-length: 20\r\n"
 
diff --git a/tests/test_receiver.py b/tests/test_receiver.py