apache · theosib-amazon · Apr 25, 2022 · Apr 25, 2022 · Apr 25, 2022 · Apr 26, 2022
diff --git a/parquet-common/src/main/java/org/apache/parquet/bytes/ByteBufferInputStream.java b/parquet-common/src/main/java/org/apache/parquet/bytes/ByteBufferInputStream.java
@@ -28,6 +28,16 @@
 
 import org.apache.parquet.ShouldNeverHappenException;
 
+/*
+Changes implemented:
+All of the functionality of LittleEndianDataInputStream has been merged into ByteBufferInputStream and its child
+classes. This has resulted in measurable performance improvements for the following reasons:
+- Elimination of at least one layer of abstraction / method call overhead
+- Enabling support for intrinsics for readInt, readLong, etc.
+- Eliminate the need for the JIT to make inferences that may or may not inline methods from BytesUtils and
+  the InputStream.read() that is called by BytesUtils.
+ */
+
 public class ByteBufferInputStream extends InputStream {
 
   // Used to maintain the deprecated behavior of instantiating ByteBufferInputStream directly
@@ -49,6 +59,19 @@ public static ByteBufferInputStream wrap(List<ByteBuffer> buffers) {
     }
   }
 
+  public static ByteBufferInputStream wrap(ByteBuffer buffer, int offset, int count) {
+    return new SingleBufferInputStream(buffer, offset, count);
+  }
+
+  public static ByteBufferInputStream wrap(byte[] buf) {
+    return new SingleBufferInputStream(buf);
+  }
+
+  public static ByteBufferInputStream wrap(byte[] buf, int start, int length) {
+    return new SingleBufferInputStream(buf, start, length);
+  }
+
+
   ByteBufferInputStream() {
     delegate = null;
   }
@@ -74,11 +97,26 @@ public ByteBufferInputStream(ByteBuffer buffer) {
    */
   @Deprecated
   public ByteBufferInputStream(ByteBuffer buffer, int offset, int count) {
+    // This is necessary to pass "TestDeprecatedBufferInputStream"...
     ByteBuffer temp = buffer.duplicate();
     temp.position(offset);
     ByteBuffer byteBuf = temp.slice();
     byteBuf.limit(count);
     delegate = wrap(byteBuf);
+    // ... but it would probably be faster to do this:
+//    delegate = wrap(buffer, offset, count);
+  }
+
+  public ByteBufferInputStream(byte[] inBuf) {
+    delegate = wrap(inBuf);
+  }
+
+  public ByteBufferInputStream(byte[] inBuf, int start, int length) {
+    delegate = wrap(inBuf, start, length);
+  }
+
+  public ByteBufferInputStream(List<ByteBuffer> inBufs) {
+    delegate = wrap(inBufs);
   }
 
   /**
@@ -98,12 +136,12 @@ public long position() {
     return delegate.position();
   }
 
+  public void position(int pos) {
+    throw new UnsupportedOperationException();
+  }
+
   public void skipFully(long n) throws IOException {
-    long skipped = skip(n);
-    if (skipped < n) {
-      throw new EOFException(
-          "Not enough bytes to skip: " + skipped + " < " + n);
-    }
+    delegate.skipFully(n);
   }
 
   public int read(ByteBuffer out) {
@@ -119,15 +157,20 @@ public List<ByteBuffer> sliceBuffers(long length) throws EOFException {
   }
 
   public ByteBufferInputStream sliceStream(long length) throws EOFException {
-    return ByteBufferInputStream.wrap(sliceBuffers(length));
+    return delegate.sliceStream(length);
+    //return ByteBufferInputStream.wrap(sliceBuffers(length));
   }
 
   public List<ByteBuffer> remainingBuffers() {
     return delegate.remainingBuffers();
   }
 
   public ByteBufferInputStream remainingStream() {
-    return ByteBufferInputStream.wrap(remainingBuffers());
+    return delegate.remainingStream();
+  }
+
+  public ByteBufferInputStream duplicate() {
+    return delegate.duplicate();
   }
 
   public int read() throws IOException {
@@ -138,14 +181,34 @@ public int read(byte[] b, int off, int len) throws IOException {
     return delegate.read(b, off, len);
   }
 
+  public int read(byte[] b) throws IOException {
+    return read(b, 0, b.length);
+  }
+
+  public void readFully(byte b[]) throws IOException {
+    readFully(b, 0, b.length);
+  }
+
+  public void readFully(byte b[], int off, int len) throws IOException {
+    delegate.readFully(b, off, len);
+  }
+
   public long skip(long n) {
     return delegate.skip(n);
   }
 
+  public int skipBytes(int n) {
+    return (int)skip(n);
+  }
+
   public int available() {
     return delegate.available();
   }
 
+  public int remaining() {
+    return available();
+  }
+
   public void mark(int readlimit) {
     delegate.mark(readlimit);
   }
@@ -157,4 +220,83 @@ public void reset() throws IOException {
   public boolean markSupported() {
     return delegate.markSupported();
   }
+
+  public void close() throws IOException {
+  }
+
+  public boolean readBoolean() throws IOException {
+    return readByte() != 0;
+  }
+
+  public byte readByte() throws IOException {
+    return delegate.readByte();
+  }
+
+  public int readUnsignedByte() throws IOException {
+    return delegate.readUnsignedByte();
+  }
+
+  public short readShort() throws IOException {
+    return delegate.readShort();
+  }
+
+  public int readUnsignedShort() throws IOException {
+    return delegate.readUnsignedShort();
+  }
+
+  public int readInt() throws IOException {
+    return delegate.readInt();
+  }
+
+  public long readLong() throws IOException {
+    return delegate.readLong();
+  }
+
+  public float readFloat() throws IOException {
+    return Float.intBitsToFloat(readInt());
+  }
+
+  public double readDouble() throws IOException {
+    return Double.longBitsToDouble(readLong());
+  }
+
+  public int readIntLittleEndianOnThreeBytes() throws IOException {
+    int ch1 = readUnsignedByte();
+    int ch2 = readUnsignedByte();
+    int ch3 = readUnsignedByte();
+    return ((ch3 << 16) + (ch2 << 8) + (ch1 << 0));
+  }
+
+  public int readIntLittleEndianPaddedOnBitWidth(int bitWidth)
+    throws IOException {
+
+    int bytesWidth = BytesUtils.paddedByteCountFromBits(bitWidth);
+    switch (bytesWidth) {
+      case 0:
+        return 0;
+      case 1:
+        return readUnsignedByte();
+      case 2:
+        return readUnsignedShort();
+      case 3:
+        return readIntLittleEndianOnThreeBytes();
+      case 4:
+        return readInt();
+      default:
+        throw new IOException(
+          String.format("Encountered bitWidth (%d) that requires more than 4 bytes", bitWidth));
+    }
+  }
+
+  public int readUnsignedVarInt() throws IOException {
+    int value = 0;
+    int i = 0;
+    int b;
+    while (((b = readUnsignedByte()) & 0x80) != 0) {
+      value |= (b & 0x7F) << i;
+      i += 7;
+    }
+    return value | (b << i);
+  }
+
 }
diff --git a/parquet-common/src/main/java/org/apache/parquet/bytes/LittleEndianDataInputStream.java b/parquet-common/src/main/java/org/apache/parquet/bytes/LittleEndianDataInputStream.java
@@ -25,6 +25,7 @@
 /**
  * Based on DataInputStream but little endian and without the String/char methods
  */
+@Deprecated
 public final class LittleEndianDataInputStream extends InputStream {
 
   private final InputStream in;