Issue #4033 Percent Encoded Bad Requests (#4272)

* Modernizing testcase Signed-off-by: Joakim Erdfelt <[email protected]> * Issue #4033 Percent Encoded Bad Requests Added test to demonstrate bad percent encoded request Signed-off-by: Greg Wilkins <[email protected]> * Issue #4033 - adding sanity test for percent paths and checkAlias() Signed-off-by: Joakim Erdfelt <[email protected]> * Eliminating 9.3.0.RC0 dependency Signed-off-by: Joakim Erdfelt <[email protected]> * Issue #4033 - More tests for Resource checkAlias() behavior Signed-off-by: Joakim Erdfelt <[email protected]> * Issue #4033 - Splitting badDecodePath Signed-off-by: Joakim Erdfelt <[email protected]> * Issue #4033 - More badDecodePath tests Signed-off-by: Joakim Erdfelt <[email protected]> * Issue #4033 Percent Encoded Bad Requests reverted decodePathBehaviour Signed-off-by: Greg Wilkins <[email protected]> * testing pull request building * Issue #4033 updates after review Signed-off-by: Greg Wilkins <[email protected]>
jetty · Nov 11, 2019 · ee0f9fc · ee0f9fc
1 parent c336616
commit ee0f9fc
Show file tree

Hide file tree

Showing 5 changed files with 217 additions and 131 deletions.
diff --git a/jetty-server/src/test/java/org/eclipse/jetty/server/HttpConnectionTest.java b/jetty-server/src/test/java/org/eclipse/jetty/server/HttpConnectionTest.java
@@ -824,12 +824,11 @@ public void testEmptyHost() throws Exception
     @Test
     public void testBadURIencoding() throws Exception
     {
-        // The URI is being leniently decoded, leaving the "%x" alone
         String response = connector.getResponse("GET /bad/encoding%x HTTP/1.1\r\n" +
             "Host: localhost\r\n" +
             "Connection: close\r\n" +
             "\r\n");
-        checkContains(response, 0, "HTTP/1.1 200");
+        checkContains(response, 0, "HTTP/1.1 400");
     }
 
     @Test

diff --git a/jetty-server/src/test/java/org/eclipse/jetty/server/HttpServerTestBase.java b/jetty-server/src/test/java/org/eclipse/jetty/server/HttpServerTestBase.java
@@ -251,6 +251,24 @@ public void testFullURI() throws Exception
         }
     }
 
+    @Test
+    public void testBadURI() throws Exception
+    {
+        configureServer(new HelloWorldHandler());
+
+        try (Socket client = newSocket(_serverURI.getHost(), _serverURI.getPort()))
+        {
+            OutputStream os = client.getOutputStream();
+
+            os.write("GET /%xx HTTP/1.0\r\n\r\n".getBytes(StandardCharsets.ISO_8859_1));
+            os.flush();
+
+            // Read the response.
+            String response = readResponse(client);
+
+            assertThat(response, Matchers.containsString("HTTP/1.1 400 "));
+        }
+    }
     @Test
     public void testExceptionThrownInHandlerLoop() throws Exception
     {

diff --git a/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java b/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java
@@ -470,70 +470,25 @@ public static String decodePath(String path, int offset, int length)
                             builder = new Utf8StringBuilder(path.length());
                             builder.append(path, offset, i - offset);
                         }
-
-                        // lenient percent decoding
-                        if (i >= end)
-                        {
-                            // [LENIENT] a percent sign at end of string.
-                            builder.append('%');
-                            i = end;
-                        }
-                        else if (end > (i + 1))
+                        if ((i + 2) < end)
                         {
-                            char type = path.charAt(i + 1);
-                            if (type == 'u')
-                            {
-                                // We have a possible (deprecated) microsoft unicode code point "%u####"
-                                // - not recommended to use as it's limited to 2 bytes.
-                                if ((i + 5) >= end)
-                                {
-                                    // [LENIENT] we have a partial "%u####" at the end of a string.
-                                    builder.append(path, i, (end - i));
-                                    i = end;
-                                }
-                                else
-                                {
-                                    // this seems wrong, as we are casting to a char, but that's the known
-                                    // limitation of this deprecated encoding (only 2 bytes allowed)
-                                    if (StringUtil.isHex(path, i + 2, 4))
-                                    {
-                                        builder.append((char)(0xffff & TypeUtil.parseInt(path, i + 2, 4, 16)));
-                                        i += 5;
-                                    }
-                                    else
-                                    {
-                                        // [LENIENT] copy the "%u" as-is.
-                                        builder.append(path, i, 2);
-                                        i += 1;
-                                    }
-                                }
-                            }
-                            else if (end > (i + 2))
+                            char u = path.charAt(i + 1);
+                            if (u == 'u')
                             {
-                                // we have a possible "%##" encoding
-                                if (StringUtil.isHex(path, i + 1, 2))
-                                {
-                                    builder.append((byte)TypeUtil.parseInt(path, i + 1, 2, 16));
-                                    i += 2;
-                                }
-                                else
-                                {
-                                    builder.append(path, i, 3);
-                                    i += 2;
-                                }
+                                // TODO remove %u support in jetty-10
+                                // this is wrong. This is a codepoint not a char
+                                builder.append((char)(0xffff & TypeUtil.parseInt(path, i + 2, 4, 16)));
+                                i += 5;
                             }
                             else
                             {
-                                // [LENIENT] incomplete "%##" sequence at end of string
-                                builder.append(path, i, (end - i));
-                                i = end;
+                                builder.append((byte)(0xff & (TypeUtil.convertHexDigit(u) * 16 + TypeUtil.convertHexDigit(path.charAt(i + 2)))));
+                                i += 2;
                             }
                         }
                         else
                         {
-                            // [LENIENT] the "%" at the end of the string
-                            builder.append(path, i, (end - i));
-                            i = end;
+                            throw new IllegalArgumentException("Bad URI % encoding");
                         }
 
                         break;
@@ -571,10 +526,18 @@ else if (end > (i + 2))
         }
         catch (NotUtf8Exception e)
         {
-            LOG.warn(path.substring(offset, offset + length) + " " + e);
-            LOG.debug(e);
+            LOG.debug(path.substring(offset, offset + length) + " " + e);
             return decodeISO88591Path(path, offset, length);
         }
+        catch (IllegalArgumentException e)
+        {
+            throw e;
+        }
+        catch (Exception e)
+        {
+            throw new IllegalArgumentException("cannot decode URI", e);
+        }
+
     }
 
     /* Decode a URI path and strip parameters of ISO-8859-1 path
@@ -599,13 +562,14 @@ private static String decodeISO88591Path(String path, int offset, int length)
                         char u = path.charAt(i + 1);
                         if (u == 'u')
                         {
-                            // TODO this is wrong. This is a codepoint not a char
+                            // TODO remove %u encoding support in jetty-10
+                            // This is wrong. This is a codepoint not a char
                             builder.append((char)(0xffff & TypeUtil.parseInt(path, i + 2, 4, 16)));
                             i += 5;
                         }
                         else
                         {
-                            builder.append((byte)(0xff & (TypeUtil.convertHexDigit(u) * 16 + TypeUtil.convertHexDigit(path.charAt(i + 2)))));
+                            builder.append((char)(0xff & (TypeUtil.convertHexDigit(u) * 16 + TypeUtil.convertHexDigit(path.charAt(i + 2)))));
                             i += 2;
                         }
                     }

diff --git a/jetty-util/src/test/java/org/eclipse/jetty/util/URIUtilTest.java b/jetty-util/src/test/java/org/eclipse/jetty/util/URIUtilTest.java
@@ -33,6 +33,7 @@
 import static org.hamcrest.Matchers.is;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 /**
@@ -94,18 +95,6 @@ public static Stream<Arguments> decodePathSource()
 
         // Deprecated Microsoft Percent-U encoding
         arguments.add(Arguments.of("abc%u3040", "abc\u3040"));
-
-        // Lenient decode
-        arguments.add(Arguments.of("abc%xyz", "abc%xyz")); // not a "%##"
-        arguments.add(Arguments.of("abc%", "abc%")); // percent at end of string
-        arguments.add(Arguments.of("abc%A", "abc%A")); // incomplete "%##" at end of string
-        arguments.add(Arguments.of("abc%uvwxyz", "abc%uvwxyz")); // not a valid "%u####"
-        arguments.add(Arguments.of("abc%uEFGHIJ", "abc%uEFGHIJ")); // not a valid "%u####"
-        arguments.add(Arguments.of("abc%uABC", "abc%uABC")); // incomplete "%u####"
-        arguments.add(Arguments.of("abc%uAB", "abc%uAB")); // incomplete "%u####"
-        arguments.add(Arguments.of("abc%uA", "abc%uA")); // incomplete "%u####"
-        arguments.add(Arguments.of("abc%u", "abc%u")); // incomplete "%u####"
-
         return arguments.stream();
     }
 
@@ -117,6 +106,60 @@ public void testDecodePath(String encodedPath, String expectedPath)
         assertEquals(expectedPath, path);
     }
 
+    public static Stream<Arguments> decodeBadPathSource()
+    {
+        List<Arguments> arguments = new ArrayList<>();
+
+        // Test for null character (real world ugly test case)
+        // TODO is this a bad decoding or a bad URI ?
+        // arguments.add(Arguments.of("/%00/"));
+
+        // Deprecated Microsoft Percent-U encoding
+        // TODO still supported for now ?
+        // arguments.add(Arguments.of("abc%u3040"));
+
+        // Bad %## encoding
+        arguments.add(Arguments.of("abc%xyz"));
+
+        // Incomplete %## encoding
+        arguments.add(Arguments.of("abc%"));
+        arguments.add(Arguments.of("abc%A"));
+
+        // Invalid microsoft %u#### encoding
+        arguments.add(Arguments.of("abc%uvwxyz"));
+        arguments.add(Arguments.of("abc%uEFGHIJ"));
+
+        // Incomplete microsoft %u#### encoding
+        arguments.add(Arguments.of("abc%uABC"));
+        arguments.add(Arguments.of("abc%uAB"));
+        arguments.add(Arguments.of("abc%uA"));
+        arguments.add(Arguments.of("abc%u"));
+
+        // Invalid UTF-8 and ISO8859-1
+        // TODO currently ISO8859 is too forgiving to detect these
+        /*
+        arguments.add(Arguments.of("abc%C3%28")); // invalid 2 octext sequence
+        arguments.add(Arguments.of("abc%A0%A1")); // invalid 2 octext sequence
+        arguments.add(Arguments.of("abc%e2%28%a1")); // invalid 3 octext sequence
+        arguments.add(Arguments.of("abc%e2%82%28")); // invalid 3 octext sequence
+        arguments.add(Arguments.of("abc%f0%28%8c%bc")); // invalid 4 octext sequence
+        arguments.add(Arguments.of("abc%f0%90%28%bc")); // invalid 4 octext sequence
+        arguments.add(Arguments.of("abc%f0%28%8c%28")); // invalid 4 octext sequence
+        arguments.add(Arguments.of("abc%f8%a1%a1%a1%a1")); // valid sequence, but not unicode
+        arguments.add(Arguments.of("abc%fc%a1%a1%a1%a1%a1")); // valid sequence, but not unicode
+        arguments.add(Arguments.of("abc%f8%a1%a1%a1")); // incomplete sequence
+         */
+
+        return arguments.stream();
+    }
+
+    @ParameterizedTest(name = "[{index}] {0}")
+    @MethodSource("decodeBadPathSource")
+    public void testBadDecodePath(String encodedPath)
+    {
+        assertThrows(IllegalArgumentException.class, () -> URIUtil.decodePath(encodedPath));
+    }
+
     @Test
     public void testDecodePathSubstring()
     {