Skip to content

Commit 28f3eeb

Browse files
finalchildclaude
andcommitted
[UNDERTOW-2655] Fix text corruption in FileUtils.readFile when reading multi-byte characters
The readFile method was reading the InputStream into a fixed-size byte buffer and decoding each chunk independently. This caused multi-byte UTF-8 character sequences to be split across buffer boundaries, resulting in text corruption with replacement characters. Replaced BufferedInputStream with InputStreamReader to handle buffering and character decoding together in a streaming fashion, ensuring multi-byte character sequences are never split. This issue became more significant after UNDERTOW-2337, as large form-data field values are now processed by this function. Originally reported in Spring Framework issue #35292. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent 47e1b06 commit 28f3eeb

File tree

2 files changed

+130
-8
lines changed

2 files changed

+130
-8
lines changed

core/src/main/java/io/undertow/util/FileUtils.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818

1919
package io.undertow.util;
2020

21-
import java.io.BufferedInputStream;
2221
import java.io.IOException;
2322
import java.io.InputStream;
23+
import java.io.InputStreamReader;
2424
import java.net.URL;
2525
import java.nio.charset.Charset;
2626
import java.nio.charset.StandardCharsets;
@@ -73,14 +73,14 @@ public static String readFile(InputStream file) {
7373
* Reads the {@link InputStream file} and converting it to {@link String} using <code>charSet</code> encoding.
7474
*/
7575
public static String readFile(InputStream file, Charset charSet) {
76-
try (BufferedInputStream stream = new BufferedInputStream(file)) {
77-
byte[] buff = new byte[1024];
78-
StringBuilder builder = new StringBuilder();
79-
int read;
80-
while ((read = stream.read(buff)) != -1) {
81-
builder.append(new String(buff, 0, read, charSet));
76+
try (InputStreamReader reader = new InputStreamReader(file, charSet)) {
77+
StringBuilder result = new StringBuilder();
78+
char[] cbuf = new char[8192];
79+
int nread;
80+
while ((nread = reader.read(cbuf, 0, cbuf.length)) != -1) {
81+
result.append(cbuf, 0, nread);
8282
}
83-
return builder.toString();
83+
return result.toString();
8484
} catch (IOException e) {
8585
throw new RuntimeException(e);
8686
}
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/*
2+
* JBoss, Home of Professional Open Source.
3+
* Copyright 2025 Red Hat, Inc., and individual contributors
4+
* as indicated by the @author tags.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package io.undertow.util;
19+
20+
import io.undertow.testutils.category.UnitTest;
21+
import org.junit.Assert;
22+
import org.junit.Test;
23+
import org.junit.experimental.categories.Category;
24+
25+
import java.io.ByteArrayInputStream;
26+
import java.io.InputStream;
27+
import java.nio.charset.StandardCharsets;
28+
29+
/**
30+
* @author Park Jaeon
31+
*/
32+
@Category(UnitTest.class)
33+
public class FileUtilsTestCase {
34+
35+
@Test
36+
public void testMultiByteCharactersAtBufferBoundary() {
37+
StringBuilder sb = new StringBuilder();
38+
39+
// Create content larger than 1024 bytes (the old buffer size)
40+
// Fill with ASCII 'a' characters up to position 1023
41+
for (int i = 0; i < 1023; i++) {
42+
sb.append('a');
43+
}
44+
45+
// Add a 3-byte UTF-8 character (Chinese character) at position 1023-1025
46+
// This would span across the 1024-byte boundary in the old implementation
47+
sb.append('世'); // 3-byte UTF-8 character
48+
49+
// Add more content to ensure we're reading beyond the first buffer
50+
for (int i = 0; i < 2000; i++) {
51+
sb.append('b');
52+
}
53+
54+
// Add some more multi-byte characters
55+
sb.append(" Hello 世界 Testing 🎉");
56+
57+
String expected = sb.toString();
58+
InputStream stream = new ByteArrayInputStream(expected.getBytes(StandardCharsets.UTF_8));
59+
60+
String result = FileUtils.readFile(stream);
61+
62+
// The bug would cause replacement character (�) to appear instead of the correct character
63+
Assert.assertFalse("Result should not contain replacement character (�)",
64+
result.contains("\uFFFD"));
65+
Assert.assertEquals("Content should be read correctly without corruption",
66+
expected, result);
67+
}
68+
69+
@Test
70+
public void testEmojisAtBufferBoundary() {
71+
StringBuilder sb = new StringBuilder();
72+
73+
// Fill up to just before 1024 bytes
74+
for (int i = 0; i < 1022; i++) {
75+
sb.append('x');
76+
}
77+
78+
// Add 4-byte emoji that would span the boundary
79+
sb.append("🎉"); // 4-byte UTF-8 character
80+
81+
// Add more content
82+
for (int i = 0; i < 500; i++) {
83+
sb.append('y');
84+
}
85+
86+
String expected = sb.toString();
87+
InputStream stream = new ByteArrayInputStream(expected.getBytes(StandardCharsets.UTF_8));
88+
89+
String result = FileUtils.readFile(stream);
90+
91+
Assert.assertFalse("Result should not contain replacement character",
92+
result.contains("\uFFFD"));
93+
Assert.assertEquals("Emoji should be preserved correctly", expected, result);
94+
}
95+
96+
@Test
97+
public void testLargeContentWithMultiByteCharacters() {
98+
StringBuilder sb = new StringBuilder();
99+
100+
// Create content that's definitely larger than 1024 bytes and includes
101+
// various multi-byte characters throughout
102+
String testPattern = "Hello 世界! Testing 🎉 multi-byte encoding. ";
103+
104+
// Repeat pattern to create large content (each pattern is ~50 bytes)
105+
for (int i = 0; i < 100; i++) {
106+
sb.append(testPattern);
107+
sb.append(i).append(" ");
108+
}
109+
110+
String expected = sb.toString();
111+
Assert.assertTrue("Content should be larger than 1024 bytes",
112+
expected.getBytes(StandardCharsets.UTF_8).length > 1024);
113+
114+
InputStream stream = new ByteArrayInputStream(expected.getBytes(StandardCharsets.UTF_8));
115+
String result = FileUtils.readFile(stream);
116+
117+
Assert.assertEquals("Large content with multi-byte characters should be read correctly",
118+
expected, result);
119+
Assert.assertFalse("No replacement characters should be present",
120+
result.contains("\uFFFD"));
121+
}
122+
}

0 commit comments

Comments
 (0)