From b12583b6b1a3d184a5d498833475166b722a6175 Mon Sep 17 00:00:00 2001 From: finalchild Date: Wed, 29 Oct 2025 00:11:05 +0900 Subject: [PATCH] [UNDERTOW-2655] Fix text corruption in FileUtils.readFile when reading multi-byte characters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The readFile method was reading the InputStream into a fixed-size byte buffer and decoding each chunk independently. This caused multi-byte UTF-8 character sequences to be split across buffer boundaries, resulting in text corruption with replacement characters. Replaced BufferedInputStream with InputStreamReader to handle buffering and character decoding together in a streaming fashion, ensuring multi-byte character sequences are never split. This issue became more significant after UNDERTOW-2337, as large form-data field values are now processed by this function. Originally reported in Spring Framework issue #35292. πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../main/java/io/undertow/util/FileUtils.java | 16 +-- .../io/undertow/util/FileUtilsTestCase.java | 122 ++++++++++++++++++ 2 files changed, 130 insertions(+), 8 deletions(-) create mode 100644 core/src/test/java/io/undertow/util/FileUtilsTestCase.java diff --git a/core/src/main/java/io/undertow/util/FileUtils.java b/core/src/main/java/io/undertow/util/FileUtils.java index f47d3448c3..9d4cde802d 100644 --- a/core/src/main/java/io/undertow/util/FileUtils.java +++ b/core/src/main/java/io/undertow/util/FileUtils.java @@ -18,9 +18,9 @@ package io.undertow.util; -import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; import java.net.URL; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; @@ -73,14 +73,14 @@ public static String readFile(InputStream file) { * Reads the {@link InputStream file} and converting it to {@link String} using charSet encoding. */ public static String readFile(InputStream file, Charset charSet) { - try (BufferedInputStream stream = new BufferedInputStream(file)) { - byte[] buff = new byte[1024]; - StringBuilder builder = new StringBuilder(); - int read; - while ((read = stream.read(buff)) != -1) { - builder.append(new String(buff, 0, read, charSet)); + try (InputStreamReader reader = new InputStreamReader(file, charSet)) { + StringBuilder result = new StringBuilder(); + char[] cbuf = new char[8192]; + int nread; + while ((nread = reader.read(cbuf, 0, cbuf.length)) != -1) { + result.append(cbuf, 0, nread); } - return builder.toString(); + return result.toString(); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/core/src/test/java/io/undertow/util/FileUtilsTestCase.java b/core/src/test/java/io/undertow/util/FileUtilsTestCase.java new file mode 100644 index 0000000000..46e5e1d8e8 --- /dev/null +++ b/core/src/test/java/io/undertow/util/FileUtilsTestCase.java @@ -0,0 +1,122 @@ +/* + * JBoss, Home of Professional Open Source. + * Copyright 2025 Red Hat, Inc., and individual contributors + * as indicated by the @author tags. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.undertow.util; + +import io.undertow.testutils.category.UnitTest; +import org.junit.Assert; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; + +/** + * @author Park Jaeon + */ +@Category(UnitTest.class) +public class FileUtilsTestCase { + + @Test + public void testMultiByteCharactersAtBufferBoundary() { + StringBuilder sb = new StringBuilder(); + + // Create content larger than 1024 bytes (the old buffer size) + // Fill with ASCII 'a' characters up to position 1023 + for (int i = 0; i < 1023; i++) { + sb.append('a'); + } + + // Add a 3-byte UTF-8 character (Chinese character) at position 1023-1025 + // This would span across the 1024-byte boundary in the old implementation + sb.append('δΈ–'); // 3-byte UTF-8 character + + // Add more content to ensure we're reading beyond the first buffer + for (int i = 0; i < 2000; i++) { + sb.append('b'); + } + + // Add some more multi-byte characters + sb.append(" Hello δΈ–η•Œ Testing πŸŽ‰"); + + String expected = sb.toString(); + InputStream stream = new ByteArrayInputStream(expected.getBytes(StandardCharsets.UTF_8)); + + String result = FileUtils.readFile(stream); + + // The bug would cause replacement character (οΏ½) to appear instead of the correct character + Assert.assertFalse("Result should not contain replacement character (οΏ½)", + result.contains("\uFFFD")); + Assert.assertEquals("Content should be read correctly without corruption", + expected, result); + } + + @Test + public void testEmojisAtBufferBoundary() { + StringBuilder sb = new StringBuilder(); + + // Fill up to just before 1024 bytes + for (int i = 0; i < 1022; i++) { + sb.append('x'); + } + + // Add 4-byte emoji that would span the boundary + sb.append("πŸŽ‰"); // 4-byte UTF-8 character + + // Add more content + for (int i = 0; i < 500; i++) { + sb.append('y'); + } + + String expected = sb.toString(); + InputStream stream = new ByteArrayInputStream(expected.getBytes(StandardCharsets.UTF_8)); + + String result = FileUtils.readFile(stream); + + Assert.assertFalse("Result should not contain replacement character", + result.contains("\uFFFD")); + Assert.assertEquals("Emoji should be preserved correctly", expected, result); + } + + @Test + public void testLargeContentWithMultiByteCharacters() { + StringBuilder sb = new StringBuilder(); + + // Create content that's definitely larger than 1024 bytes and includes + // various multi-byte characters throughout + String testPattern = "Hello δΈ–η•Œ! Testing πŸŽ‰ multi-byte encoding. "; + + // Repeat pattern to create large content (each pattern is ~50 bytes) + for (int i = 0; i < 100; i++) { + sb.append(testPattern); + sb.append(i).append(" "); + } + + String expected = sb.toString(); + Assert.assertTrue("Content should be larger than 1024 bytes", + expected.getBytes(StandardCharsets.UTF_8).length > 1024); + + InputStream stream = new ByteArrayInputStream(expected.getBytes(StandardCharsets.UTF_8)); + String result = FileUtils.readFile(stream); + + Assert.assertEquals("Large content with multi-byte characters should be read correctly", + expected, result); + Assert.assertFalse("No replacement characters should be present", + result.contains("\uFFFD")); + } +}