apache · theosib-amazon · Apr 25, 2022 · Apr 25, 2022 · Apr 25, 2022 · Apr 26, 2022
diff --git a/parquet-common/src/main/java/org/apache/parquet/bytes/ByteBufferInputStream.java b/parquet-common/src/main/java/org/apache/parquet/bytes/ByteBufferInputStream.java
@@ -99,11 +99,7 @@ public long position() {
   }
 
   public void skipFully(long n) throws IOException {
-    long skipped = skip(n);
-    if (skipped < n) {
-      throw new EOFException(
-          "Not enough bytes to skip: " + skipped + " < " + n);
-    }
+    delegate.skipFully(n);
   }
 
   public int read(ByteBuffer out) {
@@ -119,15 +115,15 @@ public List<ByteBuffer> sliceBuffers(long length) throws EOFException {
   }
 
   public ByteBufferInputStream sliceStream(long length) throws EOFException {
-    return ByteBufferInputStream.wrap(sliceBuffers(length));
+    return delegate.sliceStream(length);
   }
 
   public List<ByteBuffer> remainingBuffers() {
     return delegate.remainingBuffers();
   }
 
   public ByteBufferInputStream remainingStream() {
-    return ByteBufferInputStream.wrap(remainingBuffers());
+    return delegate.remainingStream();
   }
 
   public int read() throws IOException {
@@ -138,6 +134,18 @@ public int read(byte[] b, int off, int len) throws IOException {
     return delegate.read(b, off, len);
   }
 
+  public int read(byte[] b) throws IOException {
+    return read(b, 0, b.length);
+  }
+
+  public void readFully(byte[] b) throws IOException {
+    readFully(b, 0, b.length);
+  }
+
+  public void readFully(byte b[], int off, int len) throws IOException {
+    delegate.readFully(b, off, len);
+  }
+
   public long skip(long n) {
     return delegate.skip(n);
   }
@@ -157,4 +165,80 @@ public void reset() throws IOException {
   public boolean markSupported() {
     return delegate.markSupported();
   }
+
+  public boolean readBoolean() throws IOException {
+    return readByte() != 0;
+  }
+
+  public byte readByte() throws IOException {
+    return delegate.readByte();
+  }
+
+  public int readUnsignedByte() throws IOException {
+    return delegate.readUnsignedByte();
+  }
+
+  public short readShort() throws IOException {
+    return delegate.readShort();
+  }
+
+  public int readUnsignedShort() throws IOException {
+    return delegate.readUnsignedShort();
+  }
+
+  public int readInt() throws IOException {
+    return delegate.readInt();
+  }
+
+  public long readLong() throws IOException {
+    return delegate.readLong();
+  }
+
+  public float readFloat() throws IOException {
+    return Float.intBitsToFloat(readInt());
+  }
+
+  public double readDouble() throws IOException {
+    return Double.longBitsToDouble(readLong());
+  }
+
+  public int readIntLittleEndianOnThreeBytes() throws IOException {
+    int ch1 = readUnsignedByte();
+    int ch2 = readUnsignedByte();
+    int ch3 = readUnsignedByte();
+    return ((ch3 << 16) + (ch2 << 8) + (ch1 << 0));
+  }
+
+  public int readIntLittleEndianPaddedOnBitWidth(int bitWidth)
+    throws IOException {
+
+    int bytesWidth = BytesUtils.paddedByteCountFromBits(bitWidth);
+    switch (bytesWidth) {
+      case 0:
+        return 0;
+      case 1:
+        return readUnsignedByte();
+      case 2:
+        return readUnsignedShort();
+      case 3:
+        return readIntLittleEndianOnThreeBytes();
+      case 4:
+        return readInt();
+      default:
+        throw new IOException(
+          String.format("Encountered bitWidth (%d) that requires more than 4 bytes", bitWidth));
+    }
+  }
+
+  public int readUnsignedVarInt() throws IOException {
+    int value = 0;
+    int i = 0;
+    int b;
+    while (((b = readUnsignedByte()) & 0x80) != 0) {
+      value |= (b & 0x7F) << i;
+      i += 7;
+    }
+    return value | (b << i);
+  }
+
 }
diff --git a/parquet-common/src/main/java/org/apache/parquet/bytes/MultiBufferInputStream.java b/parquet-common/src/main/java/org/apache/parquet/bytes/MultiBufferInputStream.java
@@ -19,6 +19,7 @@
 
 package org.apache.parquet.bytes;
 
+import org.apache.parquet.ShouldNeverHappenException;
 import java.io.EOFException;
 import java.io.IOException;
 import java.nio.ByteBuffer;
@@ -27,6 +28,7 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
+import java.nio.BufferUnderflowException;
 
 class MultiBufferInputStream extends ByteBufferInputStream {
   private static final ByteBuffer EMPTY = ByteBuffer.allocate(0);
@@ -89,6 +91,15 @@ public long skip(long n) {
     return bytesSkipped;
   }
 
+  @Override
+  public void skipFully(long n) throws IOException {
+    if (current == null || n > length) {
+      throw new EOFException("Not enough bytes to skip: " + length + " < " + n);
+    }
+
+    skip(n);
+  }
+
   @Override
   public int read(ByteBuffer out) {
     int len = out.remaining();
@@ -193,6 +204,10 @@ public List<ByteBuffer> sliceBuffers(long len) throws EOFException {
     return buffers;
   }
 
+  public ByteBufferInputStream sliceStream(long length) throws EOFException {
+    return ByteBufferInputStream.wrap(sliceBuffers(length));
+  }
+
   @Override
   public List<ByteBuffer> remainingBuffers() {
     if (position >= length) {
@@ -208,6 +223,10 @@ public List<ByteBuffer> remainingBuffers() {
     }
   }
 
+  public ByteBufferInputStream remainingStream() {
+    return ByteBufferInputStream.wrap(remainingBuffers());
+  }
+
   @Override
   public int read(byte[] bytes, int off, int len) {
     if (len <= 0) {
@@ -238,27 +257,38 @@ public int read(byte[] bytes, int off, int len) {
   }
 
   @Override
-  public int read(byte[] bytes) {
-    return read(bytes, 0, bytes.length);
-  }
+  public void readFully(byte[] bytes, int off, int len) throws IOException {
+    if (len <= 0) {
+      if (len < 0) {
+        throw new IndexOutOfBoundsException("Read length must be greater than 0: " + len);
+      }
+
+      return;
+    }
 
-  @Override
-  public int read() throws IOException {
-    if (current == null) {
+    if (current == null || len > length) {
       throw new EOFException();
     }
 
-    while (true) {
+    int bytesRead = 0;
+    while (bytesRead < len) {
       if (current.remaining() > 0) {
-        this.position += 1;
-        return current.get() & 0xFF; // as unsigned
+        int bytesToRead = Math.min(len - bytesRead, current.remaining());
+        current.get(bytes, off + bytesRead, bytesToRead);
+        bytesRead += bytesToRead;
+        this.position += bytesToRead;
       } else if (!nextBuffer()) {
         // there are no more buffers
-        throw new EOFException();
+        throw new ShouldNeverHappenException();
       }
     }
   }
 
+  @Override
+  public int read() throws IOException {
+    return readUnsignedByte();
+  }
+
   @Override
   public int available() {
     long remaining = length - position;
@@ -313,6 +343,8 @@ private boolean nextBuffer() {
     }
 
     this.current = iterator.next().duplicate();
+    // Have to put the buffer in little endian mode, because it defaults to big endian
+    this.current.order(java.nio.ByteOrder.LITTLE_ENDIAN);
 
     if (mark >= 0) {
       if (position < markLimit) {
@@ -379,4 +411,120 @@ public void remove() {
       second.remove();
     }
   }
+
+  @Override
+  public byte readByte() throws IOException {
+    return (byte) readUnsignedByte();
+  }
+
+  @Override
+  public int readUnsignedByte() throws IOException {
+    if (current == null) {
+      throw new EOFException();
+    }
+
+    this.position += 1;
+    while (true) {
+      try {
+        return current.get() & 0xFF;
+      } catch (BufferUnderflowException e) {
+        if (!nextBuffer()) {
+          // there are no more buffers
+          throw new EOFException();
+        }
+      }
+    }
+  }
+
+  /**
+   * When reading a short will cross a buffer boundary, read one byte at a time.
+   * @return a short value
+   * @throws IOException
+   */
+  private int getShortSlow() throws IOException {
+    int c0 = readUnsignedByte();
+    int c1 = readUnsignedByte();
+    return ((c0 << 0) + (c1 << 8));
+  }
+
+  public short readShort() throws IOException {
+    if (current == null) {
+      throw new EOFException();
+    }
+
+    if (current.remaining() >= Short.BYTES) {
+      // If the whole short can be read from the current buffer, use intrinsics
+      this.position += Short.BYTES;
+      return current.getShort();
+    } else {
+      // Otherwise get the short one byte at a time
+      return (short) getShortSlow();
+    }
+  }
+
+  public int readUnsignedShort() throws IOException {
+    return readShort() & 0xffff;
+  }
+
+  /**
+   * When reading an int will cross a buffer boundary, read one byte at a time.
+   * @return an int value
+   * @throws IOException
+   */
+  private int getIntSlow() throws IOException {
+    int c0 = readUnsignedByte();
+    int c1 = readUnsignedByte();
+    int c2 = readUnsignedByte();
+    int c3 = readUnsignedByte();
+    return ((c0 << 0) + (c1 << 8)) + ((c2 << 16) + (c3 << 24));
+  }
+
+  @Override
+  public int readInt() throws IOException {
+    if (current == null) {
+      throw new EOFException();
+    }
+
+    if (current.remaining() >= Integer.BYTES) {
+      // If the whole int can be read from the current buffer, use intrinsics
+      this.position += Integer.BYTES;
+      return current.getInt();
+    } else {
+      // Otherwise get the int one byte at a time
+      return getIntSlow();
+    }
+  }
+
+  /**
+   * When reading a long will cross a buffer boundary, read one byte at a time.
+   * @return a long value
+   * @throws IOException
+   */
+  private long getLongSlow() throws IOException {
+    long ch0 = (long) readUnsignedByte() << 0;
+    long ch1 = (long) readUnsignedByte() << 8;
+    long ch2 = (long) readUnsignedByte() << 16;
+    long ch3 = (long) readUnsignedByte() << 24;
+    long ch4 = (long) readUnsignedByte() << 32;
+    long ch5 = (long) readUnsignedByte() << 40;
+    long ch6 = (long) readUnsignedByte() << 48;
+    long ch7 = (long) readUnsignedByte() << 56;
+    return ((ch0 + ch1) + (ch2 + ch3)) + ((ch4 + ch5) + (ch6 + ch7));
+  }
+
+  @Override
+  public long readLong() throws IOException {
+    if (current == null) {
+      throw new EOFException();
+    }
+
+    if (current.remaining() >= Long.BYTES) {
+      // If the whole short can be read from the current buffer, use intrinsics
+      this.position += Long.BYTES;
+      return current.getLong();
+    } else {
+      // Otherwise get the long one byte at a time
+      return getLongSlow();
+    }
+  }
 }