Skip to content

Commit d3fec85

Browse files
committed
Fix #125 (UTF-32 decode, buffer boundary)
1 parent ca13e4f commit d3fec85

File tree

5 files changed

+105
-47
lines changed

5 files changed

+105
-47
lines changed

release-notes/VERSION

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ Project: woodstox
44
=== Releases ===
55
------------------------------------------------------------------------
66

7+
6.2.6 (not yet released)
8+
9+
#125: `ArrayIndexOutOfBoundsException` for UTF-32 encoded data
10+
711
6.2.5 (06-Apr-2021)
812

913
#123: NPE for content that only has XML declaration and unknown encoding

src/main/java/com/ctc/wstx/io/BaseReader.java

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ protected BaseReader(ReaderConfig cfg, InputStream in, byte[] buf, int ptr, int
6969
mByteBuffer = buf;
7070
mBytePtr = ptr;
7171
mByteBufferEnd = len;
72-
mRecycleBuffer = recycleBuffer;
72+
mRecycleBuffer = recycleBuffer;
7373
}
7474

7575
/*
@@ -189,19 +189,17 @@ protected final int readBytesAt(int offset)
189189
*/
190190
public final void freeBuffers()
191191
{
192-
/* 11-Apr-2005, TSa: Ok, we can release the buffer now, to be
193-
* recycled by the next stream reader instantiated by this
194-
* thread (if any).
195-
*/
196-
if (mRecycleBuffer) {
197-
byte[] buf = mByteBuffer;
198-
if (buf != null) {
199-
mByteBuffer = null;
200-
if (mConfig != null) {
201-
mConfig.freeFullBBuffer(buf);
202-
}
203-
}
204-
}
192+
// 11-Apr-2005, TSa: Ok, we can release the buffer now, to be
193+
// recycled by the next stream reader instantiated by this thread (if any).
194+
if (mRecycleBuffer) {
195+
byte[] buf = mByteBuffer;
196+
if (buf != null) {
197+
mByteBuffer = null;
198+
if (mConfig != null) {
199+
mConfig.freeFullBBuffer(buf);
200+
}
201+
}
202+
}
205203
}
206204

207205
protected void reportBounds(char[] cbuf, int start, int len)
@@ -220,8 +218,8 @@ protected void reportInvalidXml11(int value, int bytePos, int charPos)
220218
throws IOException
221219
{
222220
throw new CharConversionException("Invalid character 0x"
223-
+Integer.toHexString(value)
224-
+", can only be included in xml 1.1 using character entities (at char #"+charPos+", byte #"+bytePos+")");
221+
+Integer.toHexString(value)
222+
+", can only be included in xml 1.1 using character entities (at char #"+charPos+", byte #"+bytePos+")");
225223
}
226224
}
227225

src/main/java/com/ctc/wstx/io/UTF32Reader.java

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -27,26 +27,26 @@
2727
public final class UTF32Reader
2828
extends BaseReader
2929
{
30-
final boolean mBigEndian;
30+
protected final boolean mBigEndian;
3131

32-
boolean mXml11;
32+
protected boolean mXml11;
3333

3434
/**
3535
* Although input is fine with full Unicode set, Java still uses
3636
* 16-bit chars, so we may have to split high-order chars into
3737
* surrogate pairs.
3838
*/
39-
char mSurrogate = NULL_CHAR;
39+
protected char mSurrogate = NULL_CHAR;
4040

4141
/**
4242
* Total read character count; used for error reporting purposes
4343
*/
44-
int mCharCount = 0;
44+
protected int mCharCount = 0;
4545

4646
/**
4747
* Total read byte count; used for error reporting purposes
4848
*/
49-
int mByteCount = 0;
49+
protected int mByteCount = 0;
5050

5151
/*
5252
////////////////////////////////////////
@@ -55,8 +55,7 @@ public final class UTF32Reader
5555
*/
5656

5757
public UTF32Reader(ReaderConfig cfg, InputStream in, byte[] buf, int ptr, int len,
58-
boolean recycleBuffer,
59-
boolean isBigEndian)
58+
boolean recycleBuffer, boolean isBigEndian)
6059
{
6160
super(cfg, in, buf, ptr, len, recycleBuffer);
6261
mBigEndian = isBigEndian;
@@ -97,24 +96,32 @@ public int read(char[] cbuf, int start, int len) throws IOException
9796
mSurrogate = NULL_CHAR;
9897
// No need to load more, already got one char
9998
} else {
100-
/* Note: we'll try to avoid blocking as much as possible. As a
101-
* result, we only need to get 4 bytes for a full char.
102-
*/
103-
int left = (mByteBufferEnd - mBytePtr);
99+
// Note: we'll try to avoid blocking as much as possible. As a
100+
// result, we only need to get 4 bytes for a full char.
101+
final int left = (mByteBufferEnd - mBytePtr);
104102
if (left < 4) {
105103
if (!loadMore(left)) { // (legal) EOF?
106-
return -1;
104+
// Ok if (but only if!) was at boundary
105+
if (left == 0) {
106+
return -1;
107+
}
108+
reportUnexpectedEOF(mByteBufferEnd - mBytePtr, 4);
107109
}
108110
}
109111
}
110112

111-
byte[] buf = mByteBuffer;
113+
final byte[] buf = mByteBuffer;
114+
// 06-Apr-2021, tatu: Must ensure we don't try to read past buffer end:
115+
final int lastValidInputStart = (mByteBufferEnd - 4);
112116

113117
main_loop:
114118
while (outPtr < len) {
115119
int ptr = mBytePtr;
116120
int ch;
117121

122+
if (mBytePtr > lastValidInputStart) {
123+
break;
124+
}
118125
if (mBigEndian) {
119126
ch = (buf[ptr] << 24) | ((buf[ptr+1] & 0xFF) << 16)
120127
| ((buf[ptr+2] & 0xFF) << 8) | (buf[ptr+3] & 0xFF);
@@ -162,9 +169,6 @@ public int read(char[] cbuf, int start, int len) throws IOException
162169
}
163170
}
164171
cbuf[outPtr++] = (char) ch;
165-
if (mBytePtr >= mByteBufferEnd) {
166-
break main_loop;
167-
}
168172
}
169173

170174
len = outPtr - start;
@@ -185,8 +189,8 @@ private void reportUnexpectedEOF(int gotBytes, int needed)
185189
int charPos = mCharCount;
186190

187191
throw new CharConversionException("Unexpected EOF in the middle of a 4-byte UTF-32 char: got "
188-
+gotBytes+", needed "+needed
189-
+", at char #"+charPos+", byte #"+bytePos+")");
192+
+gotBytes+", needed "+needed
193+
+", at char #"+charPos+", byte #"+bytePos+")");
190194
}
191195

192196
private void reportInvalid(int value, int offset, String msg)
@@ -196,8 +200,8 @@ private void reportInvalid(int value, int offset, String msg)
196200
int charPos = mCharCount + offset;
197201

198202
throw new CharConversionException("Invalid UTF-32 character 0x"
199-
+Integer.toHexString(value)
200-
+msg+" at char #"+charPos+", byte #"+bytePos+")");
203+
+Integer.toHexString(value)
204+
+msg+" at char #"+charPos+", byte #"+bytePos+")");
201205
}
202206

203207
/**
@@ -213,20 +217,18 @@ private boolean loadMore(int available)
213217

214218
// Bytes that need to be moved to the beginning of buffer?
215219
if (available > 0) {
216-
/* 11-Nov-2008, TSa: can only move if we own the buffer; otherwise
217-
* we are stuck with the data.
218-
*/
220+
// 11-Nov-2008, TSa: can only move if we own the buffer; otherwise
221+
// we are stuck with the data.
219222
if (mBytePtr > 0 && canModifyBuffer()) {
220223
for (int i = 0; i < available; ++i) {
221224
mByteBuffer[i] = mByteBuffer[mBytePtr+i];
222225
}
223226
mBytePtr = 0;
224-
mByteBufferEnd = available;
227+
mByteBufferEnd = available;
225228
}
226229
} else {
227-
/* Ok; here we can actually reasonably expect an EOF,
228-
* so let's do a separate read right away:
229-
*/
230+
// Ok; here we can actually reasonably expect an EOF,
231+
// so let's do a separate read right away:
230232
int count = readBytes();
231233
if (count < 1) {
232234
if (count < 0) { // -1
@@ -238,10 +240,8 @@ private boolean loadMore(int available)
238240
}
239241
}
240242

241-
/* Need at least 4 bytes; if we don't get that many, it's an
242-
* error.
243-
*/
244-
while (mByteBufferEnd < 4) {
243+
// Need at least 4 bytes; if we don't get that many, it's an error.
244+
while ((mByteBufferEnd - mBytePtr) < 4) {
245245
int count = readBytesAt(mByteBufferEnd);
246246
if (count < 1) {
247247
if (count < 0) { // -1, EOF... no good!
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package wstxtest.fuzz;
2+
3+
import java.io.*;
4+
5+
import javax.xml.stream.*;
6+
7+
import org.codehaus.stax2.io.Stax2ByteArraySource;
8+
9+
import wstxtest.stream.BaseStreamTest;
10+
11+
import com.ctc.wstx.exc.WstxIOException;
12+
import com.ctc.wstx.stax.WstxInputFactory;
13+
14+
//[woodstox-core#125]: UTF-32 decoding issue
15+
public class FuzzXXX_32969_UTF32ReadTest extends BaseStreamTest
16+
{
17+
private final byte[] DOC = readResource("/fuzz/fuzz-32969.xml");
18+
19+
private final WstxInputFactory STAX_F = getWstxInputFactory();
20+
{
21+
try {
22+
setLazyParsing(STAX_F, false);
23+
} catch (Exception e) {
24+
throw new Error(e);
25+
}
26+
}
27+
28+
//[woodstox-core#125]: InputStream
29+
public void testIssue124InputStream() throws Exception
30+
{
31+
XMLStreamReader sr = STAX_F.createXMLStreamReader(new ByteArrayInputStream(DOC));
32+
try {
33+
streamThrough(sr);
34+
fail("Should not pass");
35+
} catch (WstxIOException e) {
36+
verifyException(e, "Unexpected EOF in the middle of a 4-byte UTF-32 char");
37+
}
38+
sr.close();
39+
}
40+
41+
//[woodstox-core#125]: byte[] input
42+
public void testIssue465Stax2ByteArray() throws Exception
43+
{
44+
// Then "native" Byte array
45+
Stax2ByteArraySource src = new Stax2ByteArraySource(DOC, 0, DOC.length);
46+
XMLStreamReader sr = STAX_F.createXMLStreamReader(src);
47+
try {
48+
streamThrough(sr);
49+
fail("Should not pass");
50+
} catch (WstxIOException e) {
51+
verifyException(e, "Unexpected EOF in the middle of a 4-byte UTF-32 char");
52+
}
53+
sr.close();
54+
}
55+
}
56+
15.6 KB
Binary file not shown.

0 commit comments

Comments
 (0)