2727public final class UTF32Reader
2828 extends BaseReader
2929{
30- final boolean mBigEndian ;
30+ protected final boolean mBigEndian ;
3131
32- boolean mXml11 ;
32+ protected boolean mXml11 ;
3333
3434 /**
3535 * Although input is fine with full Unicode set, Java still uses
3636 * 16-bit chars, so we may have to split high-order chars into
3737 * surrogate pairs.
3838 */
39- char mSurrogate = NULL_CHAR ;
39+ protected char mSurrogate = NULL_CHAR ;
4040
4141 /**
4242 * Total read character count; used for error reporting purposes
4343 */
44- int mCharCount = 0 ;
44+ protected int mCharCount = 0 ;
4545
4646 /**
4747 * Total read byte count; used for error reporting purposes
4848 */
49- int mByteCount = 0 ;
49+ protected int mByteCount = 0 ;
5050
5151 /*
5252 ////////////////////////////////////////
@@ -55,8 +55,7 @@ public final class UTF32Reader
5555 */
5656
5757 public UTF32Reader (ReaderConfig cfg , InputStream in , byte [] buf , int ptr , int len ,
58- boolean recycleBuffer ,
59- boolean isBigEndian )
58+ boolean recycleBuffer , boolean isBigEndian )
6059 {
6160 super (cfg , in , buf , ptr , len , recycleBuffer );
6261 mBigEndian = isBigEndian ;
@@ -97,24 +96,32 @@ public int read(char[] cbuf, int start, int len) throws IOException
9796 mSurrogate = NULL_CHAR ;
9897 // No need to load more, already got one char
9998 } else {
100- /* Note: we'll try to avoid blocking as much as possible. As a
101- * result, we only need to get 4 bytes for a full char.
102- */
103- int left = (mByteBufferEnd - mBytePtr );
99+ // Note: we'll try to avoid blocking as much as possible. As a
100+ // result, we only need to get 4 bytes for a full char.
101+ final int left = (mByteBufferEnd - mBytePtr );
104102 if (left < 4 ) {
105103 if (!loadMore (left )) { // (legal) EOF?
106- return -1 ;
104+ // Ok if (but only if!) was at boundary
105+ if (left == 0 ) {
106+ return -1 ;
107+ }
108+ reportUnexpectedEOF (mByteBufferEnd - mBytePtr , 4 );
107109 }
108110 }
109111 }
110112
111- byte [] buf = mByteBuffer ;
113+ final byte [] buf = mByteBuffer ;
114+ // 06-Apr-2021, tatu: Must ensure we don't try to read past buffer end:
115+ final int lastValidInputStart = (mByteBufferEnd - 4 );
112116
113117 main_loop :
114118 while (outPtr < len ) {
115119 int ptr = mBytePtr ;
116120 int ch ;
117121
122+ if (mBytePtr > lastValidInputStart ) {
123+ break ;
124+ }
118125 if (mBigEndian ) {
119126 ch = (buf [ptr ] << 24 ) | ((buf [ptr +1 ] & 0xFF ) << 16 )
120127 | ((buf [ptr +2 ] & 0xFF ) << 8 ) | (buf [ptr +3 ] & 0xFF );
@@ -162,9 +169,6 @@ public int read(char[] cbuf, int start, int len) throws IOException
162169 }
163170 }
164171 cbuf [outPtr ++] = (char ) ch ;
165- if (mBytePtr >= mByteBufferEnd ) {
166- break main_loop ;
167- }
168172 }
169173
170174 len = outPtr - start ;
@@ -185,8 +189,8 @@ private void reportUnexpectedEOF(int gotBytes, int needed)
185189 int charPos = mCharCount ;
186190
187191 throw new CharConversionException ("Unexpected EOF in the middle of a 4-byte UTF-32 char: got "
188- +gotBytes +", needed " +needed
189- +", at char #" +charPos +", byte #" +bytePos +")" );
192+ +gotBytes +", needed " +needed
193+ +", at char #" +charPos +", byte #" +bytePos +")" );
190194 }
191195
192196 private void reportInvalid (int value , int offset , String msg )
@@ -196,8 +200,8 @@ private void reportInvalid(int value, int offset, String msg)
196200 int charPos = mCharCount + offset ;
197201
198202 throw new CharConversionException ("Invalid UTF-32 character 0x"
199- +Integer .toHexString (value )
200- +msg +" at char #" +charPos +", byte #" +bytePos +")" );
203+ +Integer .toHexString (value )
204+ +msg +" at char #" +charPos +", byte #" +bytePos +")" );
201205 }
202206
203207 /**
@@ -213,20 +217,18 @@ private boolean loadMore(int available)
213217
214218 // Bytes that need to be moved to the beginning of buffer?
215219 if (available > 0 ) {
216- /* 11-Nov-2008, TSa: can only move if we own the buffer; otherwise
217- * we are stuck with the data.
218- */
220+ // 11-Nov-2008, TSa: can only move if we own the buffer; otherwise
221+ // we are stuck with the data.
219222 if (mBytePtr > 0 && canModifyBuffer ()) {
220223 for (int i = 0 ; i < available ; ++i ) {
221224 mByteBuffer [i ] = mByteBuffer [mBytePtr +i ];
222225 }
223226 mBytePtr = 0 ;
224- mByteBufferEnd = available ;
227+ mByteBufferEnd = available ;
225228 }
226229 } else {
227- /* Ok; here we can actually reasonably expect an EOF,
228- * so let's do a separate read right away:
229- */
230+ // Ok; here we can actually reasonably expect an EOF,
231+ // so let's do a separate read right away:
230232 int count = readBytes ();
231233 if (count < 1 ) {
232234 if (count < 0 ) { // -1
@@ -238,10 +240,8 @@ private boolean loadMore(int available)
238240 }
239241 }
240242
241- /* Need at least 4 bytes; if we don't get that many, it's an
242- * error.
243- */
244- while (mByteBufferEnd < 4 ) {
243+ // Need at least 4 bytes; if we don't get that many, it's an error.
244+ while ((mByteBufferEnd - mBytePtr ) < 4 ) {
245245 int count = readBytesAt (mByteBufferEnd );
246246 if (count < 1 ) {
247247 if (count < 0 ) { // -1, EOF... no good!
0 commit comments