diff --git a/.github/config/labeler-config.yml b/.github/config/labeler-config.yml
index f968947a02e2..00a1b2948657 100644
--- a/.github/config/labeler-config.yml
+++ b/.github/config/labeler-config.yml
@@ -2,7 +2,7 @@
"tests:hive":
- lib/trino-orc/**
- lib/trino-parquet/**
- - lib/trino-rcfile/**
+ - lib/trino-hive-formats/**
- plugin/trino-hive-hadoop2/**
- plugin/trino-hive/**
- testing/trino-product-tests/**
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/FileRcFileDataSource.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalInput.java
similarity index 58%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/FileRcFileDataSource.java
rename to lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalInput.java
index 21d3188188a9..a6c9f291a518 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/FileRcFileDataSource.java
+++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalInput.java
@@ -11,78 +11,65 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.filesystem.local;
+
+import io.trino.filesystem.TrinoInput;
+import org.apache.iceberg.Files;
+import org.apache.iceberg.io.SeekableInputStream;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
+import static java.lang.Math.min;
import static java.util.Objects.requireNonNull;
-public class FileRcFileDataSource
- implements RcFileDataSource
+class LocalInput
+ implements TrinoInput
{
- private final File path;
- private final long size;
+ private final File file;
private final RandomAccessFile input;
- private long readTimeNanos;
- private long readBytes;
-
- public FileRcFileDataSource(File path)
- throws IOException
- {
- this.path = requireNonNull(path, "path is null");
- this.size = path.length();
- this.input = new RandomAccessFile(path, "r");
- }
- @Override
- public void close()
+ public LocalInput(File file)
throws IOException
{
- input.close();
- }
-
- @Override
- public long getReadBytes()
- {
- return readBytes;
+ this.file = requireNonNull(file, "file is null");
+ this.input = new RandomAccessFile(file, "r");
}
@Override
- public long getReadTimeNanos()
+ public SeekableInputStream inputStream()
{
- return readTimeNanos;
- }
-
- @Override
- public long getSize()
- {
- return size;
+ return Files.localInput(file).newStream();
}
@Override
public void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength)
throws IOException
{
- long start = System.nanoTime();
-
input.seek(position);
input.readFully(buffer, bufferOffset, bufferLength);
+ }
- readTimeNanos += System.nanoTime() - start;
- readBytes += bufferLength;
+ @Override
+ public int readTail(byte[] buffer, int bufferOffset, int bufferLength)
+ throws IOException
+ {
+ int readSize = (int) min(file.length(), bufferLength);
+ readFully(file.length() - readSize, buffer, bufferOffset, readSize);
+ return readSize;
}
@Override
- public RcFileDataSourceId getId()
+ public void close()
+ throws IOException
{
- return new RcFileDataSourceId(path.getPath());
+ input.close();
}
@Override
public String toString()
{
- return path.getPath();
+ return file.getPath();
}
}
diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalInputFile.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalInputFile.java
new file mode 100644
index 000000000000..a0475d5fc415
--- /dev/null
+++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalInputFile.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.filesystem.local;
+
+import io.trino.filesystem.TrinoInput;
+import io.trino.filesystem.TrinoInputFile;
+
+import java.io.File;
+import java.io.IOException;
+
+import static java.util.Objects.requireNonNull;
+
+public class LocalInputFile
+ implements TrinoInputFile
+{
+ private final File file;
+
+ public LocalInputFile(File file)
+ {
+ this.file = requireNonNull(file, "file is null");
+ }
+
+ @Override
+ public TrinoInput newInput()
+ throws IOException
+ {
+ return new LocalInput(file);
+ }
+
+ @Override
+ public long length()
+ throws IOException
+ {
+ return file.length();
+ }
+
+ @Override
+ public long modificationTime()
+ throws IOException
+ {
+ return file.lastModified();
+ }
+
+ @Override
+ public boolean exists()
+ throws IOException
+ {
+ return file.exists();
+ }
+
+ @Override
+ public String location()
+ {
+ return file.getPath();
+ }
+
+ @Override
+ public String toString()
+ {
+ return file.getPath();
+ }
+}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/MemoryRcFileDataSource.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInput.java
similarity index 59%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/MemoryRcFileDataSource.java
rename to lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInput.java
index 9e2710f9bb16..85a185ef8d56 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/MemoryRcFileDataSource.java
+++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInput.java
@@ -11,57 +11,54 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.filesystem.memory;
import io.airlift.slice.Slice;
+import io.trino.filesystem.TrinoInput;
+import org.apache.iceberg.io.SeekableInputStream;
+import static java.lang.Math.min;
import static java.lang.Math.toIntExact;
import static java.util.Objects.requireNonNull;
-public class MemoryRcFileDataSource
- implements RcFileDataSource
+class MemoryInput
+ implements TrinoInput
{
- private final RcFileDataSourceId id;
+ private final String location;
private final Slice data;
- private long readBytes;
- public MemoryRcFileDataSource(RcFileDataSourceId id, Slice data)
+ public MemoryInput(String location, Slice data)
{
- this.id = requireNonNull(id, "id is null");
+ this.location = requireNonNull(location, "location is null");
this.data = requireNonNull(data, "data is null");
}
@Override
- public RcFileDataSourceId getId()
+ public SeekableInputStream inputStream()
{
- return id;
+ return new MemorySeekableInputStream(data);
}
@Override
- public long getReadBytes()
+ public void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength)
{
- return readBytes;
+ data.getBytes(toIntExact(position), buffer, bufferOffset, bufferLength);
}
@Override
- public long getReadTimeNanos()
+ public int readTail(byte[] buffer, int bufferOffset, int bufferLength)
{
- return 0;
+ int readSize = min(data.length(), bufferLength);
+ readFully(data.length() - readSize, buffer, bufferOffset, readSize);
+ return readSize;
}
@Override
- public long getSize()
- {
- return data.length();
- }
+ public void close() {}
@Override
- public void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength)
+ public String toString()
{
- data.getBytes(toIntExact(position), buffer, bufferOffset, bufferLength);
- readBytes += bufferLength;
+ return location;
}
-
- @Override
- public void close() {}
}
diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInputFile.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInputFile.java
new file mode 100644
index 000000000000..81b66ce986e4
--- /dev/null
+++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInputFile.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.filesystem.memory;
+
+import io.airlift.slice.Slice;
+import io.trino.filesystem.TrinoInput;
+import io.trino.filesystem.TrinoInputFile;
+
+import java.io.IOException;
+
+import static java.util.Objects.requireNonNull;
+
+public class MemoryInputFile
+ implements TrinoInputFile
+{
+ private final String location;
+ private final Slice data;
+
+ public MemoryInputFile(String location, Slice data)
+ {
+ this.location = requireNonNull(location, "location is null");
+ this.data = requireNonNull(data, "data is null");
+ }
+
+ @Override
+ public TrinoInput newInput()
+ throws IOException
+ {
+ return new MemoryInput(location, data);
+ }
+
+ @Override
+ public long length()
+ throws IOException
+ {
+ return data.length();
+ }
+
+ @Override
+ public long modificationTime()
+ throws IOException
+ {
+ return 0;
+ }
+
+ @Override
+ public boolean exists()
+ throws IOException
+ {
+ return true;
+ }
+
+ @Override
+ public String location()
+ {
+ return location;
+ }
+
+ @Override
+ public String toString()
+ {
+ return location;
+ }
+}
diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemorySeekableInputStream.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemorySeekableInputStream.java
new file mode 100644
index 000000000000..966f0eb176da
--- /dev/null
+++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemorySeekableInputStream.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.filesystem.memory;
+
+import io.airlift.slice.Slice;
+import io.airlift.slice.SliceInput;
+import org.apache.iceberg.io.SeekableInputStream;
+
+import java.io.IOException;
+
+public class MemorySeekableInputStream
+ extends SeekableInputStream
+{
+ private final SliceInput input;
+
+ public MemorySeekableInputStream(Slice data)
+ {
+ input = data.getInput();
+ }
+
+ @Override
+ public long getPos()
+ {
+ return input.position();
+ }
+
+ @Override
+ public void seek(long newPos)
+ {
+ input.setPosition(newPos);
+ }
+
+ @Override
+ public int read()
+ throws IOException
+ {
+ return input.read();
+ }
+
+ @Override
+ public int read(byte[] destination, int destinationIndex, int length)
+ {
+ return input.read(destination, destinationIndex, length);
+ }
+
+ @Override
+ public long skip(long length)
+ {
+ return input.skip(length);
+ }
+}
diff --git a/lib/trino-rcfile/pom.xml b/lib/trino-hive-formats/pom.xml
similarity index 89%
rename from lib/trino-rcfile/pom.xml
rename to lib/trino-hive-formats/pom.xml
index 377a4a845c16..03d915cb137e 100644
--- a/lib/trino-rcfile/pom.xml
+++ b/lib/trino-hive-formats/pom.xml
@@ -9,15 +9,20 @@
../../pom.xml
- trino-rcfile
- trino-rcfile
- Trino - RCFile
+ trino-hive-formats
+ trino-hive-formats
+ Trino - Hive Formats
${project.parent.basedir}
+
+ io.trino
+ trino-filesystem
+
+
io.trino
trino-hadoop-toolkit
@@ -65,6 +70,11 @@
joda-time
+
+ org.apache.iceberg
+ iceberg-api
+
+
org.openjdk.jol
jol-core
diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/DataOutputStream.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/DataOutputStream.java
new file mode 100644
index 000000000000..a6aaae2ab218
--- /dev/null
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/DataOutputStream.java
@@ -0,0 +1,331 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.hive.formats;
+
+import io.airlift.slice.Slice;
+import io.airlift.slice.Slices;
+import org.openjdk.jol.info.ClassLayout;
+
+import java.io.Closeable;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static io.airlift.slice.SizeOf.SIZE_OF_BYTE;
+import static io.airlift.slice.SizeOf.SIZE_OF_INT;
+import static io.airlift.slice.SizeOf.SIZE_OF_LONG;
+import static io.airlift.slice.SizeOf.SIZE_OF_SHORT;
+import static java.lang.Math.toIntExact;
+
+public final class DataOutputStream
+ extends OutputStream
+ implements DataOutput
+{
+ private static final int DEFAULT_BUFFER_SIZE = 4 * 1024;
+ private static final int MINIMUM_CHUNK_SIZE = 1024;
+
+ private static final int INSTANCE_SIZE = toIntExact(ClassLayout.parseClass(DataOutputStream.class).instanceSize());
+
+ private final OutputStream outputStream;
+
+ private final Slice slice;
+ private final byte[] buffer;
+
+ /**
+ * Offset of buffer within stream.
+ */
+ private long bufferOffset;
+ /**
+ * Current position for writing in buffer.
+ */
+ private int bufferPosition;
+
+ public DataOutputStream(OutputStream inputStream)
+ {
+ this(inputStream, DEFAULT_BUFFER_SIZE);
+ }
+
+ public DataOutputStream(OutputStream outputStream, int bufferSize)
+ {
+ checkArgument(bufferSize >= MINIMUM_CHUNK_SIZE, "minimum buffer size of " + MINIMUM_CHUNK_SIZE + " required");
+ if (outputStream == null) {
+ throw new NullPointerException("outputStream is null");
+ }
+
+ this.outputStream = outputStream;
+ this.buffer = new byte[bufferSize];
+ this.slice = Slices.wrappedBuffer(buffer);
+ }
+
+ @Override
+ public void flush()
+ throws IOException
+ {
+ flushBufferToOutputStream();
+ outputStream.flush();
+ }
+
+ @Override
+ public void close()
+ throws IOException
+ {
+ try (Closeable ignored = outputStream) {
+ flushBufferToOutputStream();
+ }
+ }
+
+ public long longSize()
+ {
+ return bufferOffset + bufferPosition;
+ }
+
+ public long getRetainedSize()
+ {
+ return slice.getRetainedSize() + INSTANCE_SIZE;
+ }
+
+ @Override
+ public void writeBoolean(boolean value)
+ throws IOException
+ {
+ writeByte(value ? 1 : 0);
+ }
+
+ @Override
+ public void write(int value)
+ throws IOException
+ {
+ writeByte(value);
+ }
+
+ @Override
+ public void writeByte(int value)
+ throws IOException
+ {
+ ensureWritableBytes(SIZE_OF_BYTE);
+ slice.setByte(bufferPosition, value);
+ bufferPosition += SIZE_OF_BYTE;
+ }
+
+ @Override
+ public void writeShort(int value)
+ throws IOException
+ {
+ ensureWritableBytes(SIZE_OF_SHORT);
+ slice.setShort(bufferPosition, value);
+ bufferPosition += SIZE_OF_SHORT;
+ }
+
+ @Override
+ public void writeInt(int value)
+ throws IOException
+ {
+ ensureWritableBytes(SIZE_OF_INT);
+ slice.setInt(bufferPosition, value);
+ bufferPosition += SIZE_OF_INT;
+ }
+
+ @Override
+ public void writeLong(long value)
+ throws IOException
+ {
+ ensureWritableBytes(SIZE_OF_LONG);
+ slice.setLong(bufferPosition, value);
+ bufferPosition += SIZE_OF_LONG;
+ }
+
+ @Override
+ public void writeFloat(float value)
+ throws IOException
+ {
+ writeInt(Float.floatToIntBits(value));
+ }
+
+ @Override
+ public void writeDouble(double value)
+ throws IOException
+ {
+ writeLong(Double.doubleToLongBits(value));
+ }
+
+ public void write(Slice source)
+ throws IOException
+ {
+ write(source, 0, source.length());
+ }
+
+ public void write(Slice source, int sourceIndex, int length)
+ throws IOException
+ {
+ // Write huge chunks direct to OutputStream
+ if (length >= MINIMUM_CHUNK_SIZE) {
+ flushBufferToOutputStream();
+ writeToOutputStream(source, sourceIndex, length);
+ bufferOffset += length;
+ }
+ else {
+ ensureWritableBytes(length);
+ slice.setBytes(bufferPosition, source, sourceIndex, length);
+ bufferPosition += length;
+ }
+ }
+
+ @Override
+ public void write(byte[] source)
+ throws IOException
+ {
+ write(source, 0, source.length);
+ }
+
+ @Override
+ public void write(byte[] source, int sourceIndex, int length)
+ throws IOException
+ {
+ // Write huge chunks direct to OutputStream
+ if (length >= MINIMUM_CHUNK_SIZE) {
+ flushBufferToOutputStream();
+ writeToOutputStream(source, sourceIndex, length);
+ bufferOffset += length;
+ }
+ else {
+ ensureWritableBytes(length);
+ slice.setBytes(bufferPosition, source, sourceIndex, length);
+ bufferPosition += length;
+ }
+ }
+
+ public void write(InputStream in, int length)
+ throws IOException
+ {
+ while (length > 0) {
+ int batch = ensureBatchSize(length);
+ slice.setBytes(bufferPosition, in, batch);
+ bufferPosition += batch;
+ length -= batch;
+ }
+ }
+
+ public void writeZero(int length)
+ throws IOException
+ {
+ checkArgument(length >= 0, "length must be 0 or greater than 0.");
+
+ while (length > 0) {
+ int batch = ensureBatchSize(length);
+ Arrays.fill(buffer, bufferPosition, bufferPosition + batch, (byte) 0);
+ bufferPosition += batch;
+ length -= batch;
+ }
+ }
+
+ @Override
+ public String toString()
+ {
+ StringBuilder builder = new StringBuilder("OutputStreamSliceOutputAdapter{");
+ builder.append("outputStream=").append(outputStream);
+ builder.append("bufferSize=").append(slice.length());
+ builder.append('}');
+ return builder.toString();
+ }
+
+ private void ensureWritableBytes(int minWritableBytes)
+ throws IOException
+ {
+ if (bufferPosition + minWritableBytes > slice.length()) {
+ flushBufferToOutputStream();
+ }
+ }
+
+ private int ensureBatchSize(int length)
+ throws IOException
+ {
+ ensureWritableBytes(Math.min(MINIMUM_CHUNK_SIZE, length));
+ return Math.min(length, slice.length() - bufferPosition);
+ }
+
+ private void flushBufferToOutputStream()
+ throws IOException
+ {
+ writeToOutputStream(buffer, 0, bufferPosition);
+ bufferOffset += bufferPosition;
+ bufferPosition = 0;
+ }
+
+ private void writeToOutputStream(byte[] source, int sourceIndex, int length)
+ throws IOException
+ {
+ outputStream.write(source, sourceIndex, length);
+ }
+
+ private void writeToOutputStream(Slice source, int sourceIndex, int length)
+ throws IOException
+ {
+ source.getBytes(sourceIndex, outputStream, length);
+ }
+
+ //
+ // Unsupported operations
+ //
+
+ /**
+ * Unsupported operation
+ *
+ * @throws UnsupportedOperationException always
+ */
+ @Override
+ @Deprecated
+ public void writeChar(int value)
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Unsupported operation
+ *
+ * @throws UnsupportedOperationException always
+ */
+ @Override
+ @Deprecated
+ public void writeChars(String s)
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Unsupported operation
+ *
+ * @throws UnsupportedOperationException always
+ */
+ @Override
+ @Deprecated
+ public void writeUTF(String s)
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Unsupported operation
+ *
+ * @throws UnsupportedOperationException always
+ */
+ @Override
+ @Deprecated
+ public void writeBytes(String s)
+ {
+ throw new UnsupportedOperationException();
+ }
+}
diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/DataSeekableInputStream.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/DataSeekableInputStream.java
new file mode 100644
index 000000000000..2ec6d94e0a9f
--- /dev/null
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/DataSeekableInputStream.java
@@ -0,0 +1,457 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.hive.formats;
+
+import io.airlift.slice.Slice;
+import io.airlift.slice.Slices;
+import org.apache.iceberg.io.SeekableInputStream;
+import org.openjdk.jol.info.ClassLayout;
+
+import java.io.DataInput;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Verify.verify;
+import static io.airlift.slice.SizeOf.SIZE_OF_BYTE;
+import static io.airlift.slice.SizeOf.SIZE_OF_INT;
+import static io.airlift.slice.SizeOf.SIZE_OF_LONG;
+import static io.airlift.slice.SizeOf.SIZE_OF_SHORT;
+import static io.airlift.slice.SizeOf.sizeOf;
+import static java.lang.Math.toIntExact;
+import static java.util.Objects.requireNonNull;
+
+public final class DataSeekableInputStream
+ extends InputStream
+ implements DataInput
+{
+ private static final int INSTANCE_SIZE = toIntExact(ClassLayout.parseClass(DataSeekableInputStream.class).instanceSize());
+ private static final int DEFAULT_BUFFER_SIZE = 4 * 1024;
+ private static final int MINIMUM_CHUNK_SIZE = 1024;
+
+ private final SeekableInputStream inputStream;
+ private long readTimeNanos;
+ private long readBytes;
+
+ private final byte[] buffer;
+ private final Slice slice;
+ /**
+ * Offset of buffer within stream.
+ */
+ private long bufferOffset;
+ /**
+ * Current position for reading from buffer.
+ */
+ private int bufferPosition;
+
+ private int bufferFill;
+
+ public DataSeekableInputStream(SeekableInputStream inputStream)
+ {
+ this(inputStream, DEFAULT_BUFFER_SIZE);
+ }
+
+ public DataSeekableInputStream(SeekableInputStream inputStream, int bufferSize)
+ {
+ requireNonNull(inputStream, "inputStream is null");
+ checkArgument(bufferSize >= MINIMUM_CHUNK_SIZE, "minimum buffer size of " + MINIMUM_CHUNK_SIZE + " required");
+
+ this.inputStream = inputStream;
+ this.buffer = new byte[bufferSize];
+ this.slice = Slices.wrappedBuffer(buffer);
+ }
+
+ public long getReadTimeNanos()
+ {
+ return readTimeNanos;
+ }
+
+ public long getReadBytes()
+ {
+ return readBytes;
+ }
+
+ public long getPos()
+ throws IOException
+ {
+ return checkedCast(bufferOffset + bufferPosition);
+ }
+
+ public void seek(long newPos)
+ throws IOException
+ {
+ // todo check if new position is within the current buffer
+
+ // drop current buffer
+ bufferPosition = 0;
+ bufferFill = 0;
+
+ // skip the rest in inputStream
+ inputStream.seek(newPos);
+
+ // update buffer offset to the new position
+ bufferOffset = newPos;
+
+ verify(newPos == getPos());
+ }
+
+ @Override
+ public int available()
+ throws IOException
+ {
+ if (bufferPosition < bufferFill) {
+ return availableBytes();
+ }
+
+ return fillBuffer();
+ }
+
+ @Override
+ public int skipBytes(int n)
+ throws IOException
+ {
+ return (int) skip(n);
+ }
+
+ @Override
+ public boolean readBoolean()
+ throws IOException
+ {
+ return readByte() != 0;
+ }
+
+ @Override
+ public byte readByte()
+ throws IOException
+ {
+ ensureAvailable(SIZE_OF_BYTE);
+ byte v = slice.getByte(bufferPosition);
+ bufferPosition += SIZE_OF_BYTE;
+ return v;
+ }
+
+ @Override
+ public int readUnsignedByte()
+ throws IOException
+ {
+ return readByte() & 0xFF;
+ }
+
+ @Override
+ public short readShort()
+ throws IOException
+ {
+ ensureAvailable(SIZE_OF_SHORT);
+ short v = slice.getShort(bufferPosition);
+ bufferPosition += SIZE_OF_SHORT;
+ return v;
+ }
+
+ @Override
+ public int readUnsignedShort()
+ throws IOException
+ {
+ return readShort() & 0xFFFF;
+ }
+
+ @Override
+ public int readInt()
+ throws IOException
+ {
+ ensureAvailable(SIZE_OF_INT);
+ int v = slice.getInt(bufferPosition);
+ bufferPosition += SIZE_OF_INT;
+ return v;
+ }
+
+ /**
+ * Gets an unsigned 32-bit integer at the current {@code position}
+ * and increases the {@code position} by {@code 4} in this buffer.
+ *
+ * @throws IndexOutOfBoundsException if {@code this.available()} is less than {@code 4}
+ */
+ public long readUnsignedInt()
+ throws IOException
+ {
+ return readInt() & 0xFFFFFFFFL;
+ }
+
+ @Override
+ public long readLong()
+ throws IOException
+ {
+ ensureAvailable(SIZE_OF_LONG);
+ long v = slice.getLong(bufferPosition);
+ bufferPosition += SIZE_OF_LONG;
+ return v;
+ }
+
+ @Override
+ public float readFloat()
+ throws IOException
+ {
+ return Float.intBitsToFloat(readInt());
+ }
+
+ @Override
+ public double readDouble()
+ throws IOException
+ {
+ return Double.longBitsToDouble(readLong());
+ }
+
+ @Override
+ public int read()
+ throws IOException
+ {
+ if (available() == 0) {
+ return -1;
+ }
+
+ verify(availableBytes() > 0);
+ int v = slice.getByte(bufferPosition) & 0xFF;
+ bufferPosition += SIZE_OF_BYTE;
+ return v;
+ }
+
+ @Override
+ public long skip(long length)
+ throws IOException
+ {
+ int availableBytes = availableBytes();
+ // is skip within the current buffer?
+ if (availableBytes >= length) {
+ bufferPosition += length;
+ return length;
+ }
+
+ // drop current buffer
+ bufferPosition = bufferFill;
+
+ // skip the rest in inputStream
+ long start = System.nanoTime();
+ long inputStreamSkip = inputStream.skip(length - availableBytes);
+ readTimeNanos += System.nanoTime() - start;
+ readBytes += inputStreamSkip;
+
+ bufferOffset += inputStreamSkip;
+ return availableBytes + inputStreamSkip;
+ }
+
+ @Override
+ public int read(byte[] destination)
+ throws IOException
+ {
+ return read(destination, 0, destination.length);
+ }
+
+ @Override
+ public int read(byte[] destination, int destinationIndex, int length)
+ throws IOException
+ {
+ if (available() == 0) {
+ return -1;
+ }
+
+ verify(availableBytes() > 0);
+ int batch = Math.min(availableBytes(), length);
+ slice.getBytes(bufferPosition, destination, destinationIndex, batch);
+ bufferPosition += batch;
+ return batch;
+ }
+
+ @Override
+ public void readFully(byte[] destination)
+ throws IOException
+ {
+ readFully(destination, 0, destination.length);
+ }
+
+ @Override
+ public void readFully(byte[] destination, int destinationIndex, int length)
+ throws IOException
+ {
+ while (length > 0) {
+ int batch = Math.min(availableBytes(), length);
+ slice.getBytes(bufferPosition, destination, destinationIndex, batch);
+
+ bufferPosition += batch;
+ destinationIndex += batch;
+ length -= batch;
+
+ ensureAvailable(Math.min(length, MINIMUM_CHUNK_SIZE));
+ }
+ }
+
+ public Slice readSlice(int length)
+ throws IOException
+ {
+ if (length == 0) {
+ return Slices.EMPTY_SLICE;
+ }
+
+ Slice newSlice = Slices.allocate(length);
+ readFully(newSlice, 0, length);
+ return newSlice;
+ }
+
+ public void readFully(Slice destination)
+ throws IOException
+ {
+ readFully(destination, 0, destination.length());
+ }
+
+ public void readFully(Slice destination, int destinationIndex, int length)
+ throws IOException
+ {
+ while (length > 0) {
+ int batch = Math.min(availableBytes(), length);
+ slice.getBytes(bufferPosition, destination, destinationIndex, batch);
+
+ bufferPosition += batch;
+ destinationIndex += batch;
+ length -= batch;
+
+ ensureAvailable(Math.min(length, MINIMUM_CHUNK_SIZE));
+ }
+ }
+
+ public void readFully(OutputStream out, int length)
+ throws IOException
+ {
+ while (length > 0) {
+ int batch = Math.min(availableBytes(), length);
+ out.write(buffer, bufferPosition, batch);
+
+ bufferPosition += batch;
+ length -= batch;
+
+ ensureAvailable(Math.min(length, MINIMUM_CHUNK_SIZE));
+ }
+ }
+
+ @Override
+ public void close()
+ throws IOException
+ {
+ inputStream.close();
+ }
+
+ public long getRetainedSize()
+ {
+ return INSTANCE_SIZE + sizeOf(buffer);
+ }
+
+ private int availableBytes()
+ {
+ return bufferFill - bufferPosition;
+ }
+
+ private void ensureAvailable(int size)
+ throws IOException
+ {
+ if (bufferPosition + size < bufferFill) {
+ return;
+ }
+
+ if (fillBuffer() < size) {
+ throw new EOFException("End of stream");
+ }
+ }
+
+ private int fillBuffer()
+ throws IOException
+ {
+ // Keep the rest
+ int rest = bufferFill - bufferPosition;
+ // Use System.arraycopy for small copies
+ System.arraycopy(buffer, bufferPosition, buffer, 0, rest);
+
+ bufferFill = rest;
+ bufferOffset += bufferPosition;
+ bufferPosition = 0;
+ // Fill buffer with a minimum of bytes
+ long start = System.nanoTime();
+ while (bufferFill < MINIMUM_CHUNK_SIZE) {
+ int bytesRead = inputStream.read(buffer, bufferFill, buffer.length - bufferFill);
+ if (bytesRead < 0) {
+ break;
+ }
+
+ readBytes += bytesRead;
+ bufferFill += bytesRead;
+ }
+ readTimeNanos += System.nanoTime() - start;
+
+ return bufferFill;
+ }
+
+ private static int checkedCast(long value)
+ {
+ int result = (int) value;
+ checkArgument(result == value, "Size is greater than maximum int value");
+ return result;
+ }
+
+ //
+ // Unsupported operations
+ //
+
+ @Override
+ @SuppressWarnings("NonSynchronizedMethodOverridesSynchronizedMethod")
+ @Deprecated
+ public void mark(int readLimit)
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ @SuppressWarnings("NonSynchronizedMethodOverridesSynchronizedMethod")
+ @Deprecated
+ public void reset()
+
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ @Deprecated
+ public boolean markSupported()
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ @Deprecated
+ public char readChar()
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ @Deprecated
+ public String readLine()
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ @Deprecated
+ public String readUTF()
+ {
+ throw new UnsupportedOperationException();
+ }
+}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDecoderUtils.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ReadWriteUtils.java
similarity index 69%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDecoderUtils.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ReadWriteUtils.java
index ca496ad6d563..7d9a73619a49 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDecoderUtils.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ReadWriteUtils.java
@@ -11,12 +11,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceInput;
import io.airlift.slice.SliceOutput;
import io.airlift.slice.Slices;
+import io.trino.filesystem.TrinoInput;
+import io.trino.filesystem.TrinoInputFile;
import io.trino.spi.type.CharType;
import io.trino.spi.type.Type;
import io.trino.spi.type.VarcharType;
@@ -32,14 +34,12 @@
import static java.util.Objects.requireNonNull;
// faster versions of org.apache.hadoop.io.WritableUtils methods adapted for Slice
-public final class RcFileDecoderUtils
+public final class ReadWriteUtils
{
// 0xFFFF_FFFF + syncFirst(long) + syncSecond(long)
private static final int SYNC_SEQUENCE_LENGTH = SIZE_OF_INT + SIZE_OF_LONG + SIZE_OF_LONG;
- private RcFileDecoderUtils()
- {
- }
+ private ReadWriteUtils() {}
public static int decodeVIntSize(Slice slice, int offset)
{
@@ -67,6 +67,23 @@ public static boolean isNegativeVInt(byte value)
return value < -120 || (value >= -112 && value < 0);
}
+ public static long readVInt(DataSeekableInputStream in)
+ throws IOException
+ {
+ byte firstByte = in.readByte();
+ int length = decodeVIntSize(firstByte);
+ if (length == 1) {
+ return firstByte;
+ }
+
+ long value = 0;
+ for (int i = 1; i < length; i++) {
+ value <<= 8;
+ value |= (in.readByte() & 0xFF);
+ }
+ return isNegativeVInt(firstByte) ? ~value : value;
+ }
+
public static long readVInt(SliceInput in)
{
byte firstByte = in.readByte();
@@ -116,13 +133,13 @@ private static long readVIntInternal(Slice slice, int start, int length)
/**
* Find the beginning of the first full sync sequence that starts within the specified range.
*/
- public static long findFirstSyncPosition(RcFileDataSource dataSource, long offset, long length, long syncFirst, long syncSecond)
+ public static long findFirstSyncPosition(TrinoInputFile inputFile, long offset, long length, long syncFirst, long syncSecond)
throws IOException
{
- requireNonNull(dataSource, "dataSource is null");
+ requireNonNull(inputFile, "inputFile is null");
checkArgument(offset >= 0, "offset is negative");
checkArgument(length >= 1, "length must be at least 1");
- checkArgument(offset + length <= dataSource.getSize(), "offset plus length is greater than data size");
+ checkArgument(offset + length <= inputFile.length(), "offset plus length is greater than data size");
// The full sync sequence is "0xFFFFFFFF syncFirst syncSecond". If
// this sequence begins the file range, the start position is returned
@@ -138,36 +155,69 @@ public static long findFirstSyncPosition(RcFileDataSource dataSource, long offse
// this causes a re-read of SYNC_SEQUENCE_LENGTH bytes each time, but is much simpler code
byte[] buffer = new byte[toIntExact(min(1 << 22, length + (SYNC_SEQUENCE_LENGTH - 1)))];
Slice bufferSlice = Slices.wrappedBuffer(buffer);
- for (long position = 0; position < length; position += bufferSlice.length() - (SYNC_SEQUENCE_LENGTH - 1)) {
- // either fill the buffer entirely, or read enough to allow all bytes in offset + length to be a start sequence
- int bufferSize = toIntExact(min(buffer.length, length + (SYNC_SEQUENCE_LENGTH - 1) - position));
- // don't read off the end of the file
- bufferSize = toIntExact(min(bufferSize, dataSource.getSize() - offset - position));
-
- dataSource.readFully(offset + position, buffer, 0, bufferSize);
-
- // find the starting index position of the sync sequence
- int index = bufferSlice.indexOf(sync);
- if (index >= 0) {
- // If the starting position is before the end of the search region, return the
- // absolute start position of the sequence.
- if (position + index < length) {
- long startOfSyncSequence = offset + position + index;
- return startOfSyncSequence;
+ try (TrinoInput input = inputFile.newInput()) {
+ for (long position = 0; position < length; position += bufferSlice.length() - (SYNC_SEQUENCE_LENGTH - 1)) {
+ // either fill the buffer entirely, or read enough to allow all bytes in offset + length to be a start sequence
+ int bufferSize = toIntExact(min(buffer.length, length + (SYNC_SEQUENCE_LENGTH - 1) - position));
+ // don't read off the end of the file
+ bufferSize = toIntExact(min(bufferSize, inputFile.length() - offset - position));
+
+ input.readFully(offset + position, buffer, 0, bufferSize);
+
+ // find the starting index position of the sync sequence
+ int index = bufferSlice.indexOf(sync);
+ if (index >= 0) {
+ // If the starting position is before the end of the search region, return the
+ // absolute start position of the sequence.
+ if (position + index < length) {
+ long startOfSyncSequence = offset + position + index;
+ return startOfSyncSequence;
+ }
+ // Otherwise, this is not a match for this region
+ // Note: this case isn't strictly needed as the loop will exit, but it is
+ // simpler to explicitly call it out.
+ return -1;
}
- // Otherwise, this is not a match for this region
- // Note: this case isn't strictly needed as the loop will exit, but it is
- // simpler to explicitly call it out.
- return -1;
}
}
return -1;
}
- public static void writeLengthPrefixedString(SliceOutput out, Slice slice)
+ public static void writeLengthPrefixedString(DataOutputStream out, Slice slice)
+ throws IOException
{
writeVInt(out, slice.length());
- out.writeBytes(slice);
+ out.write(slice);
+ }
+
+ public static void writeVInt(DataOutputStream out, int value)
+ throws IOException
+ {
+ if (value >= -112 && value <= 127) {
+ out.writeByte(value);
+ return;
+ }
+
+ int length = -112;
+ if (value < 0) {
+ value ^= -1; // take one's complement'
+ length = -120;
+ }
+
+ int tmp = value;
+ while (tmp != 0) {
+ tmp = tmp >> 8;
+ length--;
+ }
+
+ out.writeByte(length);
+
+ length = (length < -120) ? -(length + 120) : -(length + 112);
+
+ for (int idx = length; idx != 0; idx--) {
+ int shiftBits = (idx - 1) * 8;
+ out.writeByte((value >> shiftBits) & 0xFF);
+ }
}
public static void writeVInt(SliceOutput out, int value)
diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodec.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodec.java
new file mode 100644
index 000000000000..a5b4d3a46a16
--- /dev/null
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodec.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.hive.formats.compression;
+
+import io.airlift.slice.DynamicSliceOutput;
+import io.airlift.slice.Slice;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionInputStream;
+import org.apache.hadoop.io.compress.CompressionOutputStream;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UncheckedIOException;
+import java.util.function.Supplier;
+
+import static java.util.Objects.requireNonNull;
+
+public class AircompressorCodec
+ implements Codec
+{
+ // Airlift Codecs are assumed to not retain memory and are assumed to not be pooled
+ private final CompressionCodec codec;
+
+ public AircompressorCodec(CompressionCodec codec)
+ {
+ this.codec = requireNonNull(codec, "codec is null");
+ }
+
+ @Override
+ public OutputStream createStreamCompressor(OutputStream outputStream)
+ throws IOException
+ {
+ return codec.createOutputStream(outputStream);
+ }
+
+ @Override
+ public ValueCompressor createValueCompressor()
+ {
+ return new AircompressorValueCompressor(codec);
+ }
+
+ private static class AircompressorValueCompressor
+ implements ValueCompressor
+ {
+ private final CompressionCodec codec;
+ private final DynamicSliceOutput buffer;
+
+ private AircompressorValueCompressor(CompressionCodec codec)
+ {
+ this.codec = requireNonNull(codec, "codec is null");
+ this.buffer = new DynamicSliceOutput(1024);
+ }
+
+ @Override
+ public Slice compress(Slice slice)
+ throws IOException
+ {
+ buffer.reset();
+ try (CompressionOutputStream compressionStream = codec.createOutputStream(buffer, codec.createCompressor())) {
+ slice.getInput().transferTo(compressionStream);
+ }
+ return buffer.slice();
+ }
+ }
+
+ @Override
+ public MemoryCompressedSliceOutput createMemoryCompressedSliceOutput(int minChunkSize, int maxChunkSize)
+ {
+ return new AircompressorCompressedSliceOutputSupplier(codec, minChunkSize, maxChunkSize).get();
+ }
+
+ // this can be dramatically simplified when actual hadoop codecs are dropped
+ private static class AircompressorCompressedSliceOutputSupplier
+ implements Supplier
+ {
+ private final CompressionCodec codec;
+ private final ChunkedSliceOutput compressedOutput;
+
+ public AircompressorCompressedSliceOutputSupplier(CompressionCodec codec, int minChunkSize, int maxChunkSize)
+ {
+ this.codec = requireNonNull(codec, "codec is null");
+ this.compressedOutput = new ChunkedSliceOutput(minChunkSize, maxChunkSize);
+ }
+
+ @Override
+ public MemoryCompressedSliceOutput get()
+ {
+ try {
+ compressedOutput.reset();
+ CompressionOutputStream compressionStream = codec.createOutputStream(compressedOutput);
+ return new MemoryCompressedSliceOutput(compressionStream, compressedOutput, this, () -> {});
+ }
+ catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+ }
+
+ @Override
+ public InputStream createStreamDecompressor(InputStream inputStream)
+ throws IOException
+ {
+ return codec.createInputStream(inputStream);
+ }
+
+ @Override
+ public ValueDecompressor createValueDecompressor()
+ {
+ return new AircompressorValueDecompressor(codec);
+ }
+
+ private static class AircompressorValueDecompressor
+ implements ValueDecompressor
+ {
+ private final CompressionCodec codec;
+
+ private AircompressorValueDecompressor(CompressionCodec codec)
+ {
+ this.codec = requireNonNull(codec, "codec is null");
+ }
+
+ @Override
+ public void decompress(Slice compressed, Slice uncompressed)
+ throws IOException
+ {
+ try (CompressionInputStream decompressorStream = codec.createInputStream(compressed.getInput())) {
+ uncompressed.setBytes(0, decompressorStream, uncompressed.length());
+ }
+ catch (IndexOutOfBoundsException | IOException e) {
+ throw new IOException("Compressed stream is truncated", e);
+ }
+ }
+ }
+}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/BufferedOutputStreamSliceOutput.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/BufferedOutputStreamSliceOutput.java
similarity index 99%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/BufferedOutputStreamSliceOutput.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/BufferedOutputStreamSliceOutput.java
index 45df863814f9..0dcb3e009d7f 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/BufferedOutputStreamSliceOutput.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/BufferedOutputStreamSliceOutput.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.compression;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/ChunkedSliceOutput.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ChunkedSliceOutput.java
similarity index 99%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/ChunkedSliceOutput.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ChunkedSliceOutput.java
index ccebbb3070e2..740c223f9ffa 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/ChunkedSliceOutput.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ChunkedSliceOutput.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.compression;
import com.google.common.collect.ImmutableList;
import io.airlift.slice.Slice;
diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Codec.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Codec.java
new file mode 100644
index 000000000000..9b55665110f9
--- /dev/null
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Codec.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.hive.formats.compression;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+public interface Codec
+{
+ OutputStream createStreamCompressor(OutputStream outputStream)
+ throws IOException;
+
+ ValueCompressor createValueCompressor();
+
+ MemoryCompressedSliceOutput createMemoryCompressedSliceOutput(int minChunkSize, int maxChunkSize)
+ throws IOException;
+
+ InputStream createStreamDecompressor(InputStream inputStream)
+ throws IOException;
+
+ ValueDecompressor createValueDecompressor();
+}
diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/CompressionKind.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/CompressionKind.java
new file mode 100644
index 000000000000..f126e2e5edb7
--- /dev/null
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/CompressionKind.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.hive.formats.compression;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import io.airlift.compress.gzip.JdkGzipCodec;
+import io.airlift.compress.lz4.Lz4Codec;
+import io.airlift.compress.lzo.LzoCodec;
+import io.airlift.compress.snappy.SnappyCodec;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.function.Function;
+
+import static com.google.common.collect.ImmutableMap.toImmutableMap;
+import static io.trino.hadoop.ConfigurationInstantiator.newEmptyConfiguration;
+import static java.util.Objects.requireNonNull;
+
+public enum CompressionKind
+{
+ SNAPPY(".snappy", "org.apache.hadoop.io.compress.SnappyCodec") {
+ @Override
+ public Codec createCodec()
+ {
+ return new AircompressorCodec(new SnappyCodec());
+ }
+ },
+ LZO(".lzo_deflate", "org.apache.hadoop.io.compress.LzoCodec", "com.hadoop.compression.lzo.LzoCodec") {
+ @Override
+ public Codec createCodec()
+ {
+ return new AircompressorCodec(new LzoCodec());
+ }
+ },
+ LZ4(".lz4", "org.apache.hadoop.io.compress.Lz4Codec") {
+ @Override
+ public Codec createCodec()
+ {
+ return new AircompressorCodec(new Lz4Codec());
+ }
+ },
+ GZIP(".gz", "org.apache.hadoop.io.compress.GzipCodec") {
+ @Override
+ public Codec createCodec()
+ {
+ return new AircompressorCodec(new JdkGzipCodec());
+ }
+ },
+ ZSTD(".zst", "org.apache.hadoop.io.compress.ZStandardCodec") {
+ @Override
+ public Codec createCodec()
+ {
+ org.apache.hadoop.io.compress.ZStandardCodec codec = new org.apache.hadoop.io.compress.ZStandardCodec();
+ codec.setConf(newEmptyConfiguration());
+ return new HadoopCodec(codec);
+ }
+ },
+ BZIP2(".bz2", "org.apache.hadoop.io.compress.BZip2Codec") {
+ @Override
+ public Codec createCodec()
+ {
+ org.apache.hadoop.io.compress.BZip2Codec codec = new org.apache.hadoop.io.compress.BZip2Codec();
+ codec.setConf(newEmptyConfiguration());
+ return new HadoopCodec(codec);
+ }
+ };
+
+ private final List hadoopClassNames;
+ private final String fileExtension;
+
+ CompressionKind(String fileExtension, String... hadoopClassNames)
+ {
+ this.hadoopClassNames = ImmutableList.copyOf(hadoopClassNames);
+ this.fileExtension = requireNonNull(fileExtension, "fileExtension is null");
+ }
+
+ public String getHadoopClassName()
+ {
+ return hadoopClassNames.get(0);
+ }
+
+ public String getFileExtension()
+ {
+ return fileExtension;
+ }
+
+ public abstract Codec createCodec();
+
+ private static final Map CODECS_BY_HADOOP_CLASS_NAME;
+
+ static {
+ ImmutableMap.Builder builder = ImmutableMap.builder();
+ for (CompressionKind codec : values()) {
+ for (String hadoopClassNames : codec.hadoopClassNames) {
+ builder.put(hadoopClassNames, codec);
+ }
+ }
+ CODECS_BY_HADOOP_CLASS_NAME = builder.buildOrThrow();
+ }
+
+ public static CompressionKind fromHadoopClassName(String hadoopClassName)
+ {
+ return Optional.ofNullable(CODECS_BY_HADOOP_CLASS_NAME.get(hadoopClassName))
+ .orElseThrow(() -> new IllegalArgumentException("Unknown codec: " + hadoopClassName));
+ }
+
+ public static Codec createCodecFromHadoopClassName(String hadoopClassName)
+ {
+ return Optional.ofNullable(CODECS_BY_HADOOP_CLASS_NAME.get(hadoopClassName))
+ .orElseThrow(() -> new IllegalArgumentException("Unknown codec: " + hadoopClassName))
+ .createCodec();
+ }
+
+ private static final Map CODECS_BY_FILE_EXTENSION = Arrays.stream(values())
+ .filter(codec -> codec.fileExtension != null)
+ .collect(toImmutableMap(codec -> codec.fileExtension, Function.identity()));
+
+ public static Optional createCodecFromExtension(String extension)
+ {
+ return Optional.ofNullable(CODECS_BY_FILE_EXTENSION.get(extension))
+ .map(CompressionKind::createCodec);
+ }
+}
diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodec.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodec.java
new file mode 100644
index 000000000000..91afbae054eb
--- /dev/null
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodec.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.hive.formats.compression;
+
+import io.airlift.slice.DynamicSliceOutput;
+import io.airlift.slice.Slice;
+import org.apache.hadoop.io.compress.CodecPool;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionInputStream;
+import org.apache.hadoop.io.compress.CompressionOutputStream;
+import org.apache.hadoop.io.compress.Compressor;
+import org.apache.hadoop.io.compress.Decompressor;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UncheckedIOException;
+import java.util.function.Supplier;
+
+import static com.google.common.base.Preconditions.checkState;
+import static java.util.Objects.requireNonNull;
+
+public class HadoopCodec
+ implements Codec
+{
+ private final CompressionCodec codec;
+
+ public HadoopCodec(CompressionCodec codec)
+ {
+ this.codec = requireNonNull(codec, "codec is null");
+ }
+
+ @Override
+ public OutputStream createStreamCompressor(OutputStream outputStream)
+ throws IOException
+ {
+ return codec.createOutputStream(outputStream);
+ }
+
+ @Override
+ public ValueCompressor createValueCompressor()
+ {
+ return new HadoopValueCompressor(codec);
+ }
+
+ private static class HadoopValueCompressor
+ implements ValueCompressor
+ {
+ private final CompressionCodec codec;
+ private final Compressor compressor;
+ private final DynamicSliceOutput buffer;
+
+ private HadoopValueCompressor(CompressionCodec codec)
+ {
+ this.codec = requireNonNull(codec, "codec is null");
+ this.compressor = CodecPool.getCompressor(requireNonNull(codec, "codec is null"));
+ this.buffer = new DynamicSliceOutput(1024);
+ }
+
+ @Override
+ public Slice compress(Slice slice)
+ throws IOException
+ {
+ compressor.reset();
+ buffer.reset();
+ try (CompressionOutputStream compressionStream = codec.createOutputStream(buffer, compressor)) {
+ slice.getInput().transferTo(compressionStream);
+ }
+ return buffer.slice();
+ }
+
+ @Override
+ public void close()
+ {
+ CodecPool.returnCompressor(compressor);
+ }
+ }
+
+ @Override
+ public MemoryCompressedSliceOutput createMemoryCompressedSliceOutput(int minChunkSize, int maxChunkSize)
+ {
+ return new HadoopCompressedSliceOutputSupplier(codec, minChunkSize, maxChunkSize).get();
+ }
+
+ private static class HadoopCompressedSliceOutputSupplier
+ implements Supplier
+ {
+ private final CompressionCodec codec;
+ private final Compressor compressor;
+ private final ChunkedSliceOutput bufferedOutput;
+
+ public HadoopCompressedSliceOutputSupplier(CompressionCodec codec, int minChunkSize, int maxChunkSize)
+ {
+ this.codec = requireNonNull(codec, "codec is null");
+ this.compressor = CodecPool.getCompressor(requireNonNull(codec, "codec is null"));
+ this.bufferedOutput = new ChunkedSliceOutput(minChunkSize, maxChunkSize);
+ }
+
+ @Override
+ public MemoryCompressedSliceOutput get()
+ {
+ try {
+ compressor.reset();
+ bufferedOutput.reset();
+ CompressionOutputStream compressionStream = codec.createOutputStream(bufferedOutput, compressor);
+ return new MemoryCompressedSliceOutput(compressionStream, bufferedOutput, this, () -> CodecPool.returnCompressor(compressor));
+ }
+ catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+ }
+
+ @Override
+ public InputStream createStreamDecompressor(InputStream inputStream)
+ throws IOException
+ {
+ return codec.createInputStream(inputStream);
+ }
+
+ @Override
+ public ValueDecompressor createValueDecompressor()
+ {
+ return new HadoopValueDecompressor(codec);
+ }
+
+ private static class HadoopValueDecompressor
+ implements ValueDecompressor
+ {
+ private final CompressionCodec codec;
+ private final Decompressor decompressor;
+ private boolean closed;
+
+ private HadoopValueDecompressor(CompressionCodec codec)
+ {
+ this.codec = requireNonNull(codec, "codec is null");
+ decompressor = CodecPool.getDecompressor(codec);
+ }
+
+ @Override
+ public void decompress(Slice compressed, Slice uncompressed)
+ throws IOException
+ {
+ checkState(!closed, "Value decompressor has been closed");
+ decompressor.reset();
+ try (CompressionInputStream decompressorStream = codec.createInputStream(compressed.getInput(), decompressor)) {
+ uncompressed.setBytes(0, decompressorStream, uncompressed.length());
+ }
+ catch (IndexOutOfBoundsException | IOException e) {
+ throw new IOException("Compressed stream is truncated", e);
+ }
+ }
+
+ @Override
+ public void close()
+ {
+ if (closed) {
+ return;
+ }
+ closed = true;
+ CodecPool.returnDecompressor(decompressor);
+ }
+ }
+}
diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/MemoryCompressedSliceOutput.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/MemoryCompressedSliceOutput.java
new file mode 100644
index 000000000000..04b5a636084f
--- /dev/null
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/MemoryCompressedSliceOutput.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.hive.formats.compression;
+
+import io.airlift.slice.Slice;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.List;
+import java.util.function.Supplier;
+
+import static com.google.common.base.Preconditions.checkState;
+import static java.util.Objects.requireNonNull;
+
+// This specialized SliceOutput has direct access buffered output slices to
+// report buffer sizes and to get the final output. Additionally, a new
+// CompressedSliceOutput can be created that reuses the underlying output
+// buffer
+public final class MemoryCompressedSliceOutput
+ extends BufferedOutputStreamSliceOutput
+{
+ private final ChunkedSliceOutput bufferedOutput;
+ private final Supplier resetFactory;
+ private final Runnable onDestroy;
+ private boolean closed;
+ private boolean destroyed;
+
+ /**
+ * @param compressionStream the compressed output stream to delegate to
+ * @param bufferedOutput the output for the compressionStream
+ * @param resetFactory the function to create a new CompressedSliceOutput that reuses the bufferedOutput
+ * @param onDestroy used to cleanup the compression when done
+ */
+ public MemoryCompressedSliceOutput(
+ OutputStream compressionStream,
+ ChunkedSliceOutput bufferedOutput,
+ Supplier resetFactory,
+ Runnable onDestroy)
+ {
+ super(compressionStream);
+ this.bufferedOutput = requireNonNull(bufferedOutput, "bufferedOutput is null");
+ this.resetFactory = requireNonNull(resetFactory, "resetFactory is null");
+ this.onDestroy = requireNonNull(onDestroy, "onDestroy is null");
+ }
+
+ @Override
+ public long getRetainedSize()
+ {
+ return super.getRetainedSize() + bufferedOutput.getRetainedSize();
+ }
+
+ public int getCompressedSize()
+ {
+ checkState(closed, "Stream has not been closed");
+ checkState(!destroyed, "Stream has been destroyed");
+ return bufferedOutput.size();
+ }
+
+ public List getCompressedSlices()
+ {
+ checkState(closed, "Stream has not been closed");
+ checkState(!destroyed, "Stream has been destroyed");
+ return bufferedOutput.getSlices();
+ }
+
+ public MemoryCompressedSliceOutput createRecycledCompressedSliceOutput()
+ {
+ checkState(closed, "Stream has not been closed");
+ checkState(!destroyed, "Stream has been destroyed");
+ destroyed = true;
+ return resetFactory.get();
+ }
+
+ @Override
+ public void close()
+ throws IOException
+ {
+ if (!closed) {
+ closed = true;
+ super.close();
+ }
+ }
+
+ public void destroy()
+ throws IOException
+ {
+ if (!destroyed) {
+ destroyed = true;
+ try {
+ close();
+ }
+ finally {
+ onDestroy.run();
+ }
+ }
+ }
+
+ public static MemoryCompressedSliceOutput createUncompressedMemorySliceOutput(int minChunkSize, int maxChunkSize)
+ {
+ return new UncompressedSliceOutputSupplier(minChunkSize, maxChunkSize).get();
+ }
+
+ private static class UncompressedSliceOutputSupplier
+ implements Supplier
+ {
+ private final ChunkedSliceOutput chunkedSliceOutput;
+
+ private UncompressedSliceOutputSupplier(int minChunkSize, int maxChunkSize)
+ {
+ chunkedSliceOutput = new ChunkedSliceOutput(minChunkSize, maxChunkSize);
+ }
+
+ @Override
+ public MemoryCompressedSliceOutput get()
+ {
+ chunkedSliceOutput.reset();
+ return new MemoryCompressedSliceOutput(chunkedSliceOutput, chunkedSliceOutput, this, () -> {});
+ }
+ }
+}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDataSource.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ValueCompressor.java
similarity index 73%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDataSource.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ValueCompressor.java
index 6da2c05d63b7..0dd99b5b677b 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDataSource.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ValueCompressor.java
@@ -11,22 +11,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.compression;
+
+import io.airlift.slice.Slice;
import java.io.Closeable;
import java.io.IOException;
-public interface RcFileDataSource
+public interface ValueCompressor
extends Closeable
{
- RcFileDataSourceId getId();
-
- long getReadBytes();
-
- long getReadTimeNanos();
-
- long getSize();
-
- void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength)
+ Slice compress(Slice slice)
throws IOException;
+
+ @Override
+ default void close() {}
}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDecompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ValueDecompressor.java
similarity index 73%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDecompressor.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ValueDecompressor.java
index 3391524c6958..3eaa64836a2e 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDecompressor.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ValueDecompressor.java
@@ -11,14 +11,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.compression;
import io.airlift.slice.Slice;
-public interface RcFileDecompressor
+import java.io.Closeable;
+import java.io.IOException;
+
+public interface ValueDecompressor
+ extends Closeable
{
void decompress(Slice compressed, Slice uncompressed)
- throws RcFileCorruptionException;
+ throws IOException;
- void destroy();
+ @Override
+ default void close() {}
}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/ColumnData.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ColumnData.java
similarity index 96%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/ColumnData.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ColumnData.java
index 123377ed9c2b..1abec9ddafd3 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/ColumnData.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ColumnData.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.rcfile;
import io.airlift.slice.Slice;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/ColumnEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ColumnEncoding.java
similarity index 95%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/ColumnEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ColumnEncoding.java
index a12846cea8d7..657b76ec9151 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/ColumnEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ColumnEncoding.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.rcfile;
import io.airlift.slice.SliceOutput;
import io.trino.spi.block.Block;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/EncodeOutput.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/EncodeOutput.java
similarity index 94%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/EncodeOutput.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/EncodeOutput.java
index a90036988511..240a84b98247 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/EncodeOutput.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/EncodeOutput.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.rcfile;
public interface EncodeOutput
{
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/PageSplitterUtil.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/PageSplitterUtil.java
similarity index 98%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/PageSplitterUtil.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/PageSplitterUtil.java
index ec6cab630d33..db4419935c3f 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/PageSplitterUtil.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/PageSplitterUtil.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.rcfile;
import com.google.common.collect.ImmutableList;
import io.trino.spi.Page;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileCorruptionException.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCorruptionException.java
similarity index 96%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileCorruptionException.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCorruptionException.java
index c21512163e23..7720660b985c 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileCorruptionException.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCorruptionException.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.rcfile;
import java.io.IOException;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileEncoding.java
similarity index 99%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileEncoding.java
index 53f3382fffef..4f3ef608907e 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileEncoding.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.rcfile;
import io.trino.spi.TrinoException;
import io.trino.spi.type.ArrayType;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileReader.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java
similarity index 77%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileReader.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java
index 7ab8849fb06b..bd4d2c9ec7fe 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileReader.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java
@@ -11,20 +11,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.rcfile;
import com.google.common.collect.ImmutableMap;
import io.airlift.slice.BasicSliceInput;
-import io.airlift.slice.ChunkedSliceInput;
-import io.airlift.slice.ChunkedSliceInput.BufferReference;
-import io.airlift.slice.ChunkedSliceInput.SliceLoader;
import io.airlift.slice.Slice;
-import io.airlift.slice.SliceInput;
import io.airlift.slice.Slices;
-import io.airlift.units.DataSize;
-import io.airlift.units.DataSize.Unit;
-import io.trino.rcfile.RcFileWriteValidation.WriteChecksum;
-import io.trino.rcfile.RcFileWriteValidation.WriteChecksumBuilder;
+import io.trino.filesystem.TrinoInputFile;
+import io.trino.hive.formats.DataSeekableInputStream;
+import io.trino.hive.formats.ReadWriteUtils;
+import io.trino.hive.formats.compression.CompressionKind;
+import io.trino.hive.formats.compression.ValueDecompressor;
+import io.trino.hive.formats.rcfile.RcFileWriteValidation.WriteChecksum;
+import io.trino.hive.formats.rcfile.RcFileWriteValidation.WriteChecksumBuilder;
import io.trino.spi.Page;
import io.trino.spi.block.Block;
import io.trino.spi.block.RunLengthEncodedBlock;
@@ -32,7 +31,6 @@
import java.io.Closeable;
import java.io.IOException;
-import java.io.UncheckedIOException;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -44,9 +42,6 @@
import static com.google.common.io.ByteStreams.skipFully;
import static io.airlift.slice.SizeOf.SIZE_OF_INT;
import static io.airlift.slice.SizeOf.SIZE_OF_LONG;
-import static io.trino.rcfile.RcFileDecoderUtils.findFirstSyncPosition;
-import static io.trino.rcfile.RcFileDecoderUtils.readVInt;
-import static io.trino.rcfile.RcFileWriteValidation.WriteChecksumBuilder.createWriteChecksumBuilder;
import static java.lang.Math.min;
import static java.lang.Math.toIntExact;
import static java.util.Objects.requireNonNull;
@@ -72,14 +67,15 @@ public class RcFileReader
private static final String COLUMN_COUNT_METADATA_KEY = "hive.io.rcfile.column.number";
- private final RcFileDataSource dataSource;
+ private final String location;
+ private final long fileSize;
private final Map readColumns;
- private final ChunkedSliceInput input;
+ private final DataSeekableInputStream input;
private final long length;
private final byte version;
- private final RcFileDecompressor decompressor;
+ private final ValueDecompressor decompressor;
private final Map metadata;
private final int columnCount;
@@ -106,49 +102,47 @@ public class RcFileReader
private final Optional writeChecksumBuilder;
public RcFileReader(
- RcFileDataSource dataSource,
+ TrinoInputFile inputFile,
RcFileEncoding encoding,
Map readColumns,
- RcFileCodecFactory codecFactory,
long offset,
- long length,
- DataSize bufferSize)
+ long length)
throws IOException
{
- this(dataSource, encoding, readColumns, codecFactory, offset, length, bufferSize, Optional.empty());
+ this(inputFile, encoding, readColumns, offset, length, Optional.empty());
}
private RcFileReader(
- RcFileDataSource dataSource,
+ TrinoInputFile inputFile,
RcFileEncoding encoding,
Map readColumns,
- RcFileCodecFactory codecFactory,
long offset,
long length,
- DataSize bufferSize,
Optional writeValidation)
throws IOException
{
- this.dataSource = requireNonNull(dataSource, "dataSource is null");
+ requireNonNull(inputFile, "inputFile is null");
+ this.location = inputFile.location();
+ this.fileSize = inputFile.length();
this.readColumns = ImmutableMap.copyOf(requireNonNull(readColumns, "readColumns is null"));
- this.input = new ChunkedSliceInput(new DataSourceSliceLoader(dataSource), toIntExact(bufferSize.toBytes()));
+ this.input = new DataSeekableInputStream(inputFile.newInput().inputStream());
this.writeValidation = requireNonNull(writeValidation, "writeValidation is null");
- this.writeChecksumBuilder = writeValidation.map(validation -> createWriteChecksumBuilder(readColumns));
+ this.writeChecksumBuilder = writeValidation.map(validation -> WriteChecksumBuilder.createWriteChecksumBuilder(readColumns));
verify(offset >= 0, "offset is negative");
- verify(offset < dataSource.getSize(), "offset is greater than data size");
+ verify(offset < inputFile.length(), "offset is greater than data size");
verify(length >= 1, "length must be at least 1");
this.length = length;
this.end = offset + length;
- verify(end <= dataSource.getSize(), "offset plus length is greater than data size");
+ verify(end <= fileSize, "offset plus length is greater than data size");
// read header
Slice magic = input.readSlice(RCFILE_MAGIC.length());
boolean compressed;
if (RCFILE_MAGIC.equals(magic)) {
version = input.readByte();
- verify(version <= CURRENT_VERSION, "RCFile version %s not supported: %s", version, dataSource);
+ verify(version <= CURRENT_VERSION, "RCFile version %s not supported: %s", version, inputFile.location());
validateWrite(validation -> validation.getVersion() == version, "Unexpected file version");
compressed = input.readBoolean();
}
@@ -157,30 +151,30 @@ else if (SEQUENCE_FILE_MAGIC.equals(magic)) {
// first version of RCFile used magic SEQ with version 6
byte sequenceFileVersion = input.readByte();
- verify(sequenceFileVersion == SEQUENCE_FILE_VERSION, "File %s is a SequenceFile not an RCFile", dataSource);
+ verify(sequenceFileVersion == SEQUENCE_FILE_VERSION, "File %s is a SequenceFile not an RCFile", inputFile.location());
// this is the first version of RCFile
this.version = FIRST_VERSION;
Slice keyClassName = readLengthPrefixedString(input);
Slice valueClassName = readLengthPrefixedString(input);
- verify(RCFILE_KEY_BUFFER_NAME.equals(keyClassName) && RCFILE_VALUE_BUFFER_NAME.equals(valueClassName), "File %s is a SequenceFile not an RCFile", dataSource);
+ verify(RCFILE_KEY_BUFFER_NAME.equals(keyClassName) && RCFILE_VALUE_BUFFER_NAME.equals(valueClassName), "File %s is a SequenceFile not an RCFile", inputFile);
compressed = input.readBoolean();
// RC file is never block compressed
if (input.readBoolean()) {
- throw corrupt("File %s is a SequenceFile not an RCFile", dataSource);
+ throw corrupt("File %s is a SequenceFile not an RCFile", inputFile.location());
}
}
else {
- throw corrupt("File %s is not an RCFile", dataSource);
+ throw corrupt("File %s is not an RCFile", inputFile.location());
}
// setup the compression codec
if (compressed) {
String codecClassName = readLengthPrefixedString(input).toStringUtf8();
validateWrite(validation -> validation.getCodecClassName().equals(Optional.of(codecClassName)), "Unexpected compression codec");
- this.decompressor = codecFactory.createDecompressor(codecClassName);
+ this.decompressor = CompressionKind.createCodecFromHadoopClassName(codecClassName).createValueDecompressor();
}
else {
validateWrite(validation -> validation.getCodecClassName().equals(Optional.empty()), "Expected file to be compressed");
@@ -189,8 +183,8 @@ else if (SEQUENCE_FILE_MAGIC.equals(magic)) {
// read metadata
int metadataEntries = Integer.reverseBytes(input.readInt());
- verify(metadataEntries >= 0, "Invalid metadata entry count %s in RCFile %s", metadataEntries, dataSource);
- verify(metadataEntries <= MAX_METADATA_ENTRIES, "Too many metadata entries (%s) in RCFile %s", metadataEntries, dataSource);
+ verify(metadataEntries >= 0, "Invalid metadata entry count %s in RCFile %s", metadataEntries, inputFile.location());
+ verify(metadataEntries <= MAX_METADATA_ENTRIES, "Too many metadata entries (%s) in RCFile %s", metadataEntries, inputFile.location());
ImmutableMap.Builder metadataBuilder = ImmutableMap.builder();
for (int i = 0; i < metadataEntries; i++) {
metadataBuilder.put(readLengthPrefixedString(input).toStringUtf8(), readLengthPrefixedString(input).toStringUtf8());
@@ -200,15 +194,16 @@ else if (SEQUENCE_FILE_MAGIC.equals(magic)) {
// get column count from metadata
String columnCountString = metadata.get(COLUMN_COUNT_METADATA_KEY);
+ verify(columnCountString != null, "Column count not specified in metadata RCFile %s", inputFile.location());
try {
columnCount = Integer.parseInt(columnCountString);
}
catch (NumberFormatException e) {
- throw corrupt("Invalid column count %s in RCFile %s", columnCountString, dataSource);
+ throw corrupt("Invalid column count %s in RCFile %s", columnCountString, inputFile.location());
}
// initialize columns
- verify(columnCount <= MAX_COLUMN_COUNT, "Too many columns (%s) in RCFile %s", columnCountString, dataSource);
+ verify(columnCount <= MAX_COLUMN_COUNT, "Too many columns (%s) in RCFile %s", columnCountString, inputFile.location());
columns = new Column[columnCount];
for (Entry entry : readColumns.entrySet()) {
if (entry.getKey() < columnCount) {
@@ -227,7 +222,12 @@ else if (SEQUENCE_FILE_MAGIC.equals(magic)) {
// of the file. In that case, the reader owns all row groups up to the first sync point.
if (offset != 0) {
// if the specified file region does not contain the start of a sync sequence, this call will close the reader
- seekToFirstRowGroupInRange(offset, length);
+ long startOfSyncSequence = ReadWriteUtils.findFirstSyncPosition(inputFile, offset, length, syncFirst, syncSecond);
+ if (startOfSyncSequence < 0) {
+ closeQuietly();
+ return;
+ }
+ input.seek(startOfSyncSequence);
}
}
@@ -253,7 +253,7 @@ public long getLength()
public long getBytesRead()
{
- return dataSource.getReadBytes();
+ return input.getReadBytes();
}
public long getRowsRead()
@@ -263,7 +263,7 @@ public long getRowsRead()
public long getReadTimeNanos()
{
- return dataSource.getReadTimeNanos();
+ return input.getReadTimeNanos();
}
public Slice getSync()
@@ -290,7 +290,7 @@ public void close()
}
finally {
if (decompressor != null) {
- decompressor.destroy();
+ decompressor.close();
}
}
if (writeChecksumBuilder.isPresent()) {
@@ -322,18 +322,18 @@ public int advance()
}
// are we at the end?
- if (input.remaining() == 0) {
+ if (fileSize - input.getPos() == 0) {
close();
return -1;
}
// read uncompressed size of row group (which is useless information)
- verify(input.remaining() >= SIZE_OF_INT, "RCFile truncated %s", dataSource.getId());
+ verify(fileSize - input.getPos() >= SIZE_OF_INT, "RCFile truncated %s", location);
int unusedRowGroupSize = Integer.reverseBytes(input.readInt());
// read sequence sync if present
if (unusedRowGroupSize == -1) {
- verify(input.remaining() >= SIZE_OF_LONG + SIZE_OF_LONG + SIZE_OF_INT, "RCFile truncated %s", dataSource.getId());
+ verify(fileSize - input.getPos() >= SIZE_OF_LONG + SIZE_OF_LONG + SIZE_OF_INT, "RCFile truncated %s", length);
// The full sync sequence is "0xFFFFFFFF syncFirst syncSecond". If
// this sequence begins in our segment, we must continue process until the
@@ -341,12 +341,12 @@ public int advance()
// We have already read the 0xFFFFFFFF above, so we must test the
// end condition back 4 bytes.
// NOTE: this decision must agree with RcFileDecoderUtils.findFirstSyncPosition
- if (input.position() - SIZE_OF_INT >= end) {
+ if (input.getPos() - SIZE_OF_INT >= end) {
close();
return -1;
}
- verify(syncFirst == input.readLong() && syncSecond == input.readLong(), "Invalid sync in RCFile %s", dataSource.getId());
+ verify(syncFirst == input.readLong() && syncSecond == input.readLong(), "Invalid sync in RCFile %s", location);
// read the useless uncompressed length
unusedRowGroupSize = Integer.reverseBytes(input.readInt());
@@ -362,7 +362,9 @@ else if (rowsRead > 0) {
if (compressedHeaderSize > compressedHeaderBuffer.length()) {
compressedHeaderBuffer = Slices.allocate(compressedHeaderSize);
}
- input.readBytes(compressedHeaderBuffer, 0, compressedHeaderSize);
+ // use exact sized compressed header to avoid problems where compression algorithms over read
+ Slice compressedHeader = compressedHeaderBuffer.slice(0, compressedHeaderSize);
+ input.readFully(compressedHeader);
// decompress row group header
Slice header;
@@ -372,18 +374,18 @@ else if (rowsRead > 0) {
}
Slice buffer = headerBuffer.slice(0, uncompressedHeaderSize);
- decompressor.decompress(compressedHeaderBuffer, buffer);
+ decompressor.decompress(compressedHeader, buffer);
header = buffer;
}
else {
- verify(compressedHeaderSize == uncompressedHeaderSize, "Invalid RCFile %s", dataSource.getId());
- header = compressedHeaderBuffer;
+ verify(compressedHeaderSize == uncompressedHeaderSize, "Invalid RCFile %s", location);
+ header = compressedHeader;
}
BasicSliceInput headerInput = header.getInput();
// read number of rows in row group
- rowGroupRowCount = toIntExact(readVInt(headerInput));
+ rowGroupRowCount = toIntExact(ReadWriteUtils.readVInt(headerInput));
rowsRead += rowGroupRowCount;
rowGroupPosition = 0;
currentChunkRowCount = min(ColumnData.MAX_SIZE, rowGroupRowCount);
@@ -391,14 +393,14 @@ else if (rowsRead > 0) {
// set column buffers
int totalCompressedDataSize = 0;
for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) {
- int compressedDataSize = toIntExact(readVInt(headerInput));
+ int compressedDataSize = toIntExact(ReadWriteUtils.readVInt(headerInput));
totalCompressedDataSize += compressedDataSize;
- int uncompressedDataSize = toIntExact(readVInt(headerInput));
+ int uncompressedDataSize = toIntExact(ReadWriteUtils.readVInt(headerInput));
if (decompressor == null && compressedDataSize != uncompressedDataSize) {
- throw corrupt("Invalid RCFile %s", dataSource.getId());
+ throw corrupt("Invalid RCFile %s", location);
}
- int lengthsSize = toIntExact(readVInt(headerInput));
+ int lengthsSize = toIntExact(ReadWriteUtils.readVInt(headerInput));
Slice lengthsBuffer = headerInput.readSlice(lengthsSize);
@@ -434,20 +436,9 @@ public Block readBlock(int columnIndex)
return columns[columnIndex].readBlock(rowGroupPosition, currentChunkRowCount);
}
- public RcFileDataSourceId getId()
- {
- return dataSource.getId();
- }
-
- private void seekToFirstRowGroupInRange(long offset, long length)
- throws IOException
+ public String getFileLocation()
{
- long startOfSyncSequence = findFirstSyncPosition(dataSource, offset, length, syncFirst, syncSecond);
- if (startOfSyncSequence < 0) {
- closeQuietly();
- return;
- }
- input.setPosition(startOfSyncSequence);
+ return location;
}
private void closeQuietly()
@@ -459,10 +450,10 @@ private void closeQuietly()
}
}
- private Slice readLengthPrefixedString(SliceInput in)
- throws RcFileCorruptionException
+ private Slice readLengthPrefixedString(DataSeekableInputStream in)
+ throws IOException
{
- int length = toIntExact(readVInt(in));
+ int length = toIntExact(ReadWriteUtils.readVInt(in));
verify(length <= MAX_METADATA_STRING_LENGTH, "Metadata string value is too long (%s) in RCFile %s", length, in);
return in.readSlice(length);
}
@@ -491,9 +482,7 @@ private void validateWrite(Predicate test, String message
private void validateWriteRowGroupChecksum()
{
- if (writeChecksumBuilder.isPresent()) {
- writeChecksumBuilder.get().addRowGroup(rowGroupRowCount);
- }
+ writeChecksumBuilder.ifPresent(checksumBuilder -> checksumBuilder.addRowGroup(rowGroupRowCount));
}
private void validateWritePageChecksum()
@@ -510,10 +499,9 @@ private void validateWritePageChecksum()
static void validateFile(
RcFileWriteValidation writeValidation,
- RcFileDataSource input,
+ TrinoInputFile inputFile,
RcFileEncoding encoding,
- List types,
- RcFileCodecFactory codecFactory)
+ List types)
throws RcFileCorruptionException
{
ImmutableMap.Builder readTypes = ImmutableMap.builder();
@@ -521,13 +509,11 @@ static void validateFile(
readTypes.put(columnIndex, types.get(columnIndex));
}
try (RcFileReader rcFileReader = new RcFileReader(
- input,
+ inputFile,
encoding,
readTypes.buildOrThrow(),
- codecFactory,
0,
- input.getSize(),
- DataSize.of(8, Unit.MEGABYTE),
+ inputFile.length(),
Optional.of(writeValidation))) {
while (rcFileReader.advance() >= 0) {
// ignored
@@ -544,7 +530,7 @@ static void validateFile(
private static class Column
{
private final ColumnEncoding encoding;
- private final RcFileDecompressor decompressor;
+ private final ValueDecompressor decompressor;
private BasicSliceInput lengthsInput;
private Slice dataBuffer;
@@ -560,7 +546,7 @@ private static class Column
private int runLength;
private int lastValueLength = -1;
- public Column(ColumnEncoding encoding, RcFileDecompressor decompressor)
+ public Column(ColumnEncoding encoding, ValueDecompressor decompressor)
{
this.encoding = encoding;
this.decompressor = decompressor;
@@ -630,7 +616,7 @@ private int readNextValueLength()
return lastValueLength;
}
- int valueLength = toIntExact(readVInt(lengthsInput));
+ int valueLength = toIntExact(ReadWriteUtils.readVInt(lengthsInput));
// negative length is used to encode a run or the last value
if (valueLength < 0) {
@@ -663,73 +649,4 @@ private Slice getDataBuffer()
return dataBuffer;
}
}
-
- private static class DataSourceSliceLoader
- implements SliceLoader
- {
- private final RcFileDataSource dataSource;
-
- public DataSourceSliceLoader(RcFileDataSource dataSource)
- {
- this.dataSource = dataSource;
- }
-
- @Override
- public ByteArrayBufferReference createBuffer(int bufferSize)
- {
- return new ByteArrayBufferReference(bufferSize);
- }
-
- @Override
- public long getSize()
- {
- return dataSource.getSize();
- }
-
- @Override
- public void load(long position, ByteArrayBufferReference bufferReference, int length)
- {
- try {
- dataSource.readFully(position, bufferReference.getByteBuffer(), 0, length);
- }
- catch (IOException e) {
- throw new UncheckedIOException(e);
- }
- }
-
- @Override
- public void close()
- {
- try {
- dataSource.close();
- }
- catch (IOException e) {
- throw new UncheckedIOException(e);
- }
- }
- }
-
- private static class ByteArrayBufferReference
- implements BufferReference
- {
- private final byte[] byteBuffer;
- private final Slice sliceBuffer;
-
- public ByteArrayBufferReference(int size)
- {
- byteBuffer = new byte[size];
- sliceBuffer = Slices.wrappedBuffer(byteBuffer);
- }
-
- public byte[] getByteBuffer()
- {
- return byteBuffer;
- }
-
- @Override
- public Slice getSlice()
- {
- return sliceBuffer;
- }
- }
}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileWriteValidation.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriteValidation.java
similarity index 90%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileWriteValidation.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriteValidation.java
index 21a3ab3103b1..668c43efd4dd 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileWriteValidation.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriteValidation.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.rcfile;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
@@ -200,46 +200,39 @@ public RcFileWriteValidationBuilder(List types)
this.checksum = new WriteChecksumBuilder(types);
}
- public RcFileWriteValidationBuilder setVersion(byte version)
+ public void setVersion(byte version)
{
this.version = version;
- return this;
}
- public RcFileWriteValidationBuilder addMetadataProperty(String key, String value)
+ public void addMetadataProperty(String key, String value)
{
metadata.put(key, value);
- return this;
}
- public RcFileWriteValidationBuilder setCodecClassName(Optional codecClassName)
+ public void setCodecClassName(Optional codecClassName)
{
this.codecClassName = codecClassName;
- return this;
}
- public RcFileWriteValidationBuilder setSyncFirst(long syncFirst)
+ public void setSyncFirst(long syncFirst)
{
this.syncFirst = syncFirst;
- return this;
}
- public RcFileWriteValidationBuilder setSyncSecond(long syncSecond)
+ public void setSyncSecond(long syncSecond)
{
this.syncSecond = syncSecond;
- return this;
}
- public RcFileWriteValidationBuilder addRowGroup(int rowCount)
+ public void addRowGroup(int rowCount)
{
checksum.addRowGroup(rowCount);
- return this;
}
- public RcFileWriteValidationBuilder addPage(Page page)
+ public void addPage(Page page)
{
checksum.addPage(page);
- return this;
}
public RcFileWriteValidation build()
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileWriter.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java
similarity index 85%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileWriter.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java
index 572e483add83..d3369063358b 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileWriter.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java
@@ -11,15 +11,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.rcfile;
import com.google.common.io.Closer;
import io.airlift.slice.DynamicSliceOutput;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
import io.airlift.units.DataSize;
-import io.trino.rcfile.RcFileCompressor.CompressedSliceOutput;
-import io.trino.rcfile.RcFileWriteValidation.RcFileWriteValidationBuilder;
+import io.trino.filesystem.TrinoInputFile;
+import io.trino.hive.formats.DataOutputStream;
+import io.trino.hive.formats.compression.Codec;
+import io.trino.hive.formats.compression.CompressionKind;
+import io.trino.hive.formats.compression.MemoryCompressedSliceOutput;
+import io.trino.hive.formats.rcfile.RcFileWriteValidation.RcFileWriteValidationBuilder;
import io.trino.spi.Page;
import io.trino.spi.block.Block;
import io.trino.spi.type.Type;
@@ -29,6 +33,7 @@
import java.io.Closeable;
import java.io.IOException;
+import java.io.OutputStream;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -41,10 +46,9 @@
import static io.airlift.slice.Slices.utf8Slice;
import static io.airlift.units.DataSize.Unit.KILOBYTE;
import static io.airlift.units.DataSize.Unit.MEGABYTE;
-import static io.trino.rcfile.PageSplitterUtil.splitPage;
-import static io.trino.rcfile.RcFileDecoderUtils.writeLengthPrefixedString;
-import static io.trino.rcfile.RcFileDecoderUtils.writeVInt;
-import static io.trino.rcfile.RcFileReader.validateFile;
+import static io.trino.hive.formats.ReadWriteUtils.writeLengthPrefixedString;
+import static io.trino.hive.formats.ReadWriteUtils.writeVInt;
+import static io.trino.hive.formats.rcfile.RcFileReader.validateFile;
import static java.lang.StrictMath.toIntExact;
import static java.util.Objects.requireNonNull;
@@ -68,15 +72,14 @@ public class RcFileWriter
PRESTO_RCFILE_WRITER_VERSION = version == null ? "UNKNOWN" : version;
}
- private final SliceOutput output;
+ private final DataOutputStream output;
private final List types;
private final RcFileEncoding encoding;
- private final RcFileCodecFactory codecFactory;
private final long syncFirst = ThreadLocalRandom.current().nextLong();
private final long syncSecond = ThreadLocalRandom.current().nextLong();
- private CompressedSliceOutput keySectionOutput;
+ private MemoryCompressedSliceOutput keySectionOutput;
private final ColumnEncoder[] columnEncoders;
private final int targetMinRowGroupSize;
@@ -91,21 +94,19 @@ public class RcFileWriter
private final RcFileWriteValidationBuilder validationBuilder;
public RcFileWriter(
- SliceOutput output,
+ OutputStream rawOutput,
List types,
RcFileEncoding encoding,
- Optional codecName,
- RcFileCodecFactory codecFactory,
+ Optional compressionKind,
Map metadata,
boolean validate)
throws IOException
{
this(
- output,
+ rawOutput,
types,
encoding,
- codecName,
- codecFactory,
+ compressionKind,
metadata,
DEFAULT_TARGET_MIN_ROW_GROUP_SIZE,
DEFAULT_TARGET_MAX_ROW_GROUP_SIZE,
@@ -113,23 +114,21 @@ public RcFileWriter(
}
public RcFileWriter(
- SliceOutput output,
+ OutputStream rawOutput,
List types,
RcFileEncoding encoding,
- Optional codecName,
- RcFileCodecFactory codecFactory,
+ Optional compressionKind,
Map metadata,
DataSize targetMinRowGroupSize,
DataSize targetMaxRowGroupSize,
boolean validate)
throws IOException
{
- requireNonNull(output, "output is null");
+ requireNonNull(rawOutput, "rawOutput is null");
requireNonNull(types, "types is null");
checkArgument(!types.isEmpty(), "types is empty");
requireNonNull(encoding, "encoding is null");
- requireNonNull(codecName, "codecName is null");
- requireNonNull(codecFactory, "codecFactory is null");
+ requireNonNull(compressionKind, "compressionKind is null");
requireNonNull(metadata, "metadata is null");
checkArgument(!metadata.containsKey(PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY), "Cannot set property %s", PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY);
checkArgument(!metadata.containsKey(COLUMN_COUNT_METADATA_KEY), "Cannot set property %s", COLUMN_COUNT_METADATA_KEY);
@@ -139,20 +138,21 @@ public RcFileWriter(
this.validationBuilder = validate ? new RcFileWriteValidationBuilder(types) : null;
- this.output = output;
+ this.output = new DataOutputStream(rawOutput);
this.types = types;
this.encoding = encoding;
- this.codecFactory = codecFactory;
// write header
- output.writeBytes(RCFILE_MAGIC);
+ output.write(RCFILE_MAGIC);
output.writeByte(CURRENT_VERSION);
recordValidation(validation -> validation.setVersion((byte) CURRENT_VERSION));
// write codec information
- output.writeBoolean(codecName.isPresent());
- codecName.ifPresent(name -> writeLengthPrefixedString(output, utf8Slice(name)));
- recordValidation(validation -> validation.setCodecClassName(codecName));
+ output.writeBoolean(compressionKind.isPresent());
+ if (compressionKind.isPresent()) {
+ writeLengthPrefixedString(output, utf8Slice(compressionKind.get().getHadoopClassName()));
+ }
+ recordValidation(validation -> validation.setCodecClassName(compressionKind.map(CompressionKind::getHadoopClassName)));
// write metadata
output.writeInt(Integer.reverseBytes(metadata.size() + 2));
@@ -169,20 +169,21 @@ public RcFileWriter(
recordValidation(validation -> validation.setSyncSecond(syncSecond));
// initialize columns
- RcFileCompressor compressor = codecName.map(codecFactory::createCompressor).orElse(new NoneCompressor());
- keySectionOutput = compressor.createCompressedSliceOutput((int) MIN_BUFFER_SIZE.toBytes(), (int) MAX_BUFFER_SIZE.toBytes());
- keySectionOutput.close(); // output is recycled on first use which requires output to be closed
+ Optional codec = compressionKind.map(CompressionKind::createCodec);
+ keySectionOutput = createMemoryCompressedSliceOutput(codec);
+ keySectionOutput.close(); // output is recycled on first use which requires the output to be closed
columnEncoders = new ColumnEncoder[types.size()];
for (int columnIndex = 0; columnIndex < types.size(); columnIndex++) {
Type type = types.get(columnIndex);
ColumnEncoding columnEncoding = encoding.getEncoding(type);
- columnEncoders[columnIndex] = new ColumnEncoder(columnEncoding, compressor);
+ columnEncoders[columnIndex] = new ColumnEncoder(columnEncoding, codec);
}
this.targetMinRowGroupSize = toIntExact(targetMinRowGroupSize.toBytes());
this.targetMaxRowGroupSize = toIntExact(targetMaxRowGroupSize.toBytes());
}
private void writeMetadataProperty(String key, String value)
+ throws IOException
{
writeLengthPrefixedString(output, utf8Slice(key));
writeLengthPrefixedString(output, utf8Slice(value));
@@ -210,16 +211,15 @@ private void recordValidation(Consumer task)
}
}
- public void validate(RcFileDataSource input)
+ public void validate(TrinoInputFile inputFile)
throws RcFileCorruptionException
{
checkState(validationBuilder != null, "validation is not enabled");
validateFile(
validationBuilder.build(),
- input,
+ inputFile,
encoding,
- types,
- codecFactory);
+ types);
}
public long getRetainedSizeInBytes()
@@ -239,7 +239,7 @@ public void write(Page page)
if (page.getPositionCount() == 0) {
return;
}
- List pages = splitPage(page, targetMaxRowGroupSize);
+ List pages = PageSplitterUtil.splitPage(page, targetMaxRowGroupSize);
for (Page splitPage : pages) {
bufferPage(splitPage);
}
@@ -310,14 +310,14 @@ private void writeRowGroup()
output.writeInt(Integer.reverseBytes(keySectionOutput.size()));
output.writeInt(Integer.reverseBytes(keySectionOutput.getCompressedSize()));
for (Slice slice : keySectionOutput.getCompressedSlices()) {
- output.writeBytes(slice);
+ output.write(slice);
}
// write value section
for (ColumnEncoder columnEncoder : columnEncoders) {
List slices = columnEncoder.getCompressedData();
for (Slice slice : slices) {
- output.writeBytes(slice);
+ output.write(slice);
}
columnEncoder.reset();
}
@@ -327,6 +327,15 @@ private void writeRowGroup()
bufferedRows = 0;
}
+ private static MemoryCompressedSliceOutput createMemoryCompressedSliceOutput(Optional codec)
+ throws IOException
+ {
+ if (codec.isPresent()) {
+ return codec.get().createMemoryCompressedSliceOutput((int) MIN_BUFFER_SIZE.toBytes(), (int) MAX_BUFFER_SIZE.toBytes());
+ }
+ return MemoryCompressedSliceOutput.createUncompressedMemorySliceOutput((int) MIN_BUFFER_SIZE.toBytes(), (int) MAX_BUFFER_SIZE.toBytes());
+ }
+
private static class ColumnEncoder
{
private static final int INSTANCE_SIZE = toIntExact(ClassLayout.parseClass(ColumnEncoder.class).instanceSize() + ClassLayout.parseClass(ColumnEncodeOutput.class).instanceSize());
@@ -337,14 +346,15 @@ private static class ColumnEncoder
private final SliceOutput lengthOutput = new DynamicSliceOutput(512);
- private CompressedSliceOutput output;
+ private MemoryCompressedSliceOutput output;
private boolean columnClosed;
- public ColumnEncoder(ColumnEncoding columnEncoding, RcFileCompressor compressor)
+ public ColumnEncoder(ColumnEncoding columnEncoding, Optional codec)
+ throws IOException
{
this.columnEncoding = columnEncoding;
- this.output = compressor.createCompressedSliceOutput((int) MIN_BUFFER_SIZE.toBytes(), (int) MAX_BUFFER_SIZE.toBytes());
+ this.output = createMemoryCompressedSliceOutput(codec);
this.encodeOutput = new ColumnEncodeOutput(lengthOutput, output);
}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/TimestampHolder.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/TimestampHolder.java
similarity index 95%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/TimestampHolder.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/TimestampHolder.java
index c770ec18a18e..cb0f38341984 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/TimestampHolder.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/TimestampHolder.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.rcfile;
import io.trino.spi.block.Block;
import io.trino.spi.type.LongTimestamp;
@@ -33,7 +33,7 @@ public final class TimestampHolder
private final long seconds;
private final int nanosOfSecond;
- public TimestampHolder(long epochMicros, int picosOfMicro)
+ private TimestampHolder(long epochMicros, int picosOfMicro)
{
this.seconds = floorDiv(epochMicros, MICROSECONDS_PER_SECOND);
long picosOfSecond = (long) floorMod(epochMicros, MICROSECONDS_PER_SECOND) * PICOSECONDS_PER_MICROSECOND + picosOfMicro;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/ValidationHash.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ValidationHash.java
similarity index 98%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/ValidationHash.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ValidationHash.java
index 2f90bf94dfbd..dd1f992b0fc5 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/ValidationHash.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ValidationHash.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.rcfile;
import io.trino.spi.block.Block;
import io.trino.spi.function.InvocationConvention;
@@ -59,7 +59,7 @@ class ValidationHash
}
}
- // This should really come from the environment, but there is not good way to get a value here
+ // This should really come from the environment, but there is no good way to get a value here
private static final TypeOperators VALIDATION_TYPE_OPERATORS_CACHE = new TypeOperators();
public static ValidationHash createValidationHash(Type type)
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BinaryColumnEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryColumnEncoding.java
similarity index 91%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BinaryColumnEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryColumnEncoding.java
index 2ef66e228c70..c00fc5bfce3c 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BinaryColumnEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryColumnEncoding.java
@@ -11,11 +11,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnEncoding;
+import io.trino.hive.formats.rcfile.ColumnEncoding;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BinaryEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryEncoding.java
similarity index 87%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BinaryEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryEncoding.java
index 1e4033f5a258..7cd095afd296 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BinaryEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryEncoding.java
@@ -11,19 +11,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
-import static io.trino.rcfile.RcFileDecoderUtils.decodeVIntSize;
-import static io.trino.rcfile.RcFileDecoderUtils.readVInt;
-import static io.trino.rcfile.RcFileDecoderUtils.writeVInt;
+import static io.trino.hive.formats.ReadWriteUtils.decodeVIntSize;
+import static io.trino.hive.formats.ReadWriteUtils.readVInt;
+import static io.trino.hive.formats.ReadWriteUtils.writeVInt;
import static java.lang.Math.toIntExact;
public class BinaryEncoding
@@ -55,7 +55,7 @@ public void encodeColumn(Block block, SliceOutput output, EncodeOutput encodeOut
public void encodeValueInto(Block block, int position, SliceOutput output)
{
Slice slice = type.getSlice(block, position);
- // Note binary nested in complex structures do no use the empty marker.
+ // Note binary nested in complex structures do not use the empty marker.
// Therefore, empty VARBINARY values are ok.
writeVInt(output, slice.length());
output.writeBytes(slice);
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BinaryRcFileEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryRcFileEncoding.java
similarity index 93%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BinaryRcFileEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryRcFileEncoding.java
index ac085212816f..60fd3760b832 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BinaryRcFileEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryRcFileEncoding.java
@@ -11,10 +11,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
-import io.trino.rcfile.ColumnEncoding;
-import io.trino.rcfile.RcFileEncoding;
+import io.trino.hive.formats.rcfile.ColumnEncoding;
+import io.trino.hive.formats.rcfile.RcFileEncoding;
import io.trino.spi.type.TimestampType;
import io.trino.spi.type.Type;
import org.joda.time.DateTimeZone;
@@ -127,7 +127,7 @@ public ColumnEncoding structEncoding(Type type, List fieldEncodi
return new StructEncoding(
type,
fieldEncodings.stream()
- .map(field -> (BinaryColumnEncoding) field)
+ .map(BinaryColumnEncoding.class::cast)
.collect(Collectors.toList()));
}
}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BlockEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BlockEncoding.java
similarity index 95%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BlockEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BlockEncoding.java
index 0327dfc39852..4f5e329a148c 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BlockEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BlockEncoding.java
@@ -11,13 +11,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
import io.airlift.slice.DynamicSliceOutput;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BooleanEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BooleanEncoding.java
similarity index 94%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BooleanEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BooleanEncoding.java
index fa1b0e3fb9df..04f1e2a6bec9 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BooleanEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BooleanEncoding.java
@@ -11,12 +11,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/ByteEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ByteEncoding.java
similarity index 94%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/ByteEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ByteEncoding.java
index 4e78a098d6cf..d41dc44c58ee 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/ByteEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ByteEncoding.java
@@ -11,12 +11,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/DateEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DateEncoding.java
similarity index 88%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/DateEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DateEncoding.java
index 78f0528239e1..3b872c313d9d 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/DateEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DateEncoding.java
@@ -11,19 +11,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
-import static io.trino.rcfile.RcFileDecoderUtils.decodeVIntSize;
-import static io.trino.rcfile.RcFileDecoderUtils.readVInt;
-import static io.trino.rcfile.RcFileDecoderUtils.writeVInt;
+import static io.trino.hive.formats.ReadWriteUtils.decodeVIntSize;
+import static io.trino.hive.formats.ReadWriteUtils.readVInt;
+import static io.trino.hive.formats.ReadWriteUtils.writeVInt;
import static java.lang.Math.toIntExact;
public class DateEncoding
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/DecimalEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DecimalEncoding.java
similarity index 95%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/DecimalEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DecimalEncoding.java
index 4df1218e111a..cf2eddeaad7a 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/DecimalEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DecimalEncoding.java
@@ -11,13 +11,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
import io.airlift.slice.Slices;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.DecimalType;
@@ -28,9 +28,9 @@
import java.math.BigInteger;
import static com.google.common.base.Preconditions.checkState;
-import static io.trino.rcfile.RcFileDecoderUtils.decodeVIntSize;
-import static io.trino.rcfile.RcFileDecoderUtils.readVInt;
-import static io.trino.rcfile.RcFileDecoderUtils.writeVInt;
+import static io.trino.hive.formats.ReadWriteUtils.decodeVIntSize;
+import static io.trino.hive.formats.ReadWriteUtils.readVInt;
+import static io.trino.hive.formats.ReadWriteUtils.writeVInt;
import static io.trino.spi.type.Decimals.rescale;
import static java.lang.Math.toIntExact;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/DoubleEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DoubleEncoding.java
similarity index 95%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/DoubleEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DoubleEncoding.java
index 338b67b3939b..c0acf5f96343 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/DoubleEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DoubleEncoding.java
@@ -11,12 +11,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/FloatEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/FloatEncoding.java
similarity index 95%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/FloatEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/FloatEncoding.java
index b3ff029ed84f..d6dd88f6474c 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/FloatEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/FloatEncoding.java
@@ -11,12 +11,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/ListEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ListEncoding.java
similarity index 88%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/ListEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ListEncoding.java
index 0cd8690ee523..ca455c4d0ae7 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/ListEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ListEncoding.java
@@ -11,17 +11,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
+import io.trino.hive.formats.ReadWriteUtils;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
-import static io.trino.rcfile.RcFileDecoderUtils.decodeVIntSize;
-import static io.trino.rcfile.RcFileDecoderUtils.readVInt;
-import static io.trino.rcfile.RcFileDecoderUtils.writeVInt;
import static java.lang.Math.toIntExact;
public class ListEncoding
@@ -39,7 +37,7 @@ public ListEncoding(Type type, BinaryColumnEncoding elementEncoding)
public void encodeValue(Block block, int position, SliceOutput output)
{
Block list = block.getObject(position, Block.class);
- writeVInt(output, list.getPositionCount());
+ ReadWriteUtils.writeVInt(output, list.getPositionCount());
// write null bits
int nullByte = 0;
@@ -66,8 +64,8 @@ public void encodeValue(Block block, int position, SliceOutput output)
public void decodeValueInto(BlockBuilder builder, Slice slice, int offset, int length)
{
// entries in list
- int entries = toIntExact(readVInt(slice, offset));
- offset += decodeVIntSize(slice.getByte(offset));
+ int entries = toIntExact(ReadWriteUtils.readVInt(slice, offset));
+ offset += ReadWriteUtils.decodeVIntSize(slice.getByte(offset));
// null bytes
int nullByteCur = offset;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/LongEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/LongEncoding.java
similarity index 78%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/LongEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/LongEncoding.java
index 622bf0d5592d..c8f60434526f 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/LongEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/LongEncoding.java
@@ -11,20 +11,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.ReadWriteUtils;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
-import static io.trino.rcfile.RcFileDecoderUtils.decodeVIntSize;
-import static io.trino.rcfile.RcFileDecoderUtils.readVInt;
-import static io.trino.rcfile.RcFileDecoderUtils.writeVLong;
-
public class LongEncoding
implements BinaryColumnEncoding
{
@@ -40,7 +37,7 @@ public void encodeColumn(Block block, SliceOutput output, EncodeOutput encodeOut
{
for (int position = 0; position < block.getPositionCount(); position++) {
if (!block.isNull(position)) {
- writeVLong(output, type.getLong(block, position));
+ ReadWriteUtils.writeVLong(output, type.getLong(block, position));
}
encodeOutput.closeEntry();
}
@@ -49,7 +46,7 @@ public void encodeColumn(Block block, SliceOutput output, EncodeOutput encodeOut
@Override
public void encodeValueInto(Block block, int position, SliceOutput output)
{
- writeVLong(output, type.getLong(block, position));
+ ReadWriteUtils.writeVLong(output, type.getLong(block, position));
}
@Override
@@ -66,7 +63,7 @@ public Block decodeColumn(ColumnData columnData)
builder.appendNull();
}
else {
- type.writeLong(builder, readVInt(slice, offset, length));
+ type.writeLong(builder, ReadWriteUtils.readVInt(slice, offset, length));
}
}
return builder.build();
@@ -81,12 +78,12 @@ public int getValueOffset(Slice slice, int offset)
@Override
public int getValueLength(Slice slice, int offset)
{
- return decodeVIntSize(slice, offset);
+ return ReadWriteUtils.decodeVIntSize(slice, offset);
}
@Override
public void decodeValueInto(BlockBuilder builder, Slice slice, int offset, int length)
{
- type.writeLong(builder, readVInt(slice, offset, length));
+ type.writeLong(builder, ReadWriteUtils.readVInt(slice, offset, length));
}
}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/MapEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/MapEncoding.java
similarity index 92%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/MapEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/MapEncoding.java
index 88c049dfa31a..6886db230e5c 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/MapEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/MapEncoding.java
@@ -11,19 +11,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
+import io.trino.hive.formats.ReadWriteUtils;
import io.trino.spi.StandardErrorCode;
import io.trino.spi.TrinoException;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
-import static io.trino.rcfile.RcFileDecoderUtils.decodeVIntSize;
-import static io.trino.rcfile.RcFileDecoderUtils.readVInt;
-import static io.trino.rcfile.RcFileDecoderUtils.writeVInt;
import static java.lang.Math.toIntExact;
public class MapEncoding
@@ -45,7 +43,7 @@ public void encodeValue(Block block, int position, SliceOutput output)
Block map = block.getObject(position, Block.class);
// write entry count
- writeVInt(output, map.getPositionCount() / 2);
+ ReadWriteUtils.writeVInt(output, map.getPositionCount() / 2);
// write null bits
int nullByte = 0b0101_0101;
@@ -86,8 +84,8 @@ public void encodeValue(Block block, int position, SliceOutput output)
public void decodeValueInto(BlockBuilder builder, Slice slice, int offset, int length)
{
// entries in list
- int entries = toIntExact(readVInt(slice, offset));
- offset += decodeVIntSize(slice.getByte(offset));
+ int entries = toIntExact(ReadWriteUtils.readVInt(slice, offset));
+ offset += ReadWriteUtils.decodeVIntSize(slice.getByte(offset));
// null bytes
int nullByteCur = offset;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/ShortEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ShortEncoding.java
similarity index 91%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/ShortEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ShortEncoding.java
index 7269e942afd4..d3b8a2ccecbc 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/ShortEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ShortEncoding.java
@@ -11,12 +11,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
@@ -62,7 +62,7 @@ public Block decodeColumn(ColumnData columnData)
int length = columnData.getLength(i);
if (length != 0) {
checkState(length == SIZE_OF_SHORT, "Short should be 2 bytes");
- type.writeLong(builder, (long) Short.reverseBytes(slice.getShort(columnData.getOffset(i))));
+ type.writeLong(builder, Short.reverseBytes(slice.getShort(columnData.getOffset(i))));
}
else {
builder.appendNull();
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/StringEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StringEncoding.java
similarity index 84%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/StringEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StringEncoding.java
index edb2b387caba..eebf717aa4b9 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/StringEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StringEncoding.java
@@ -11,21 +11,21 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
import static io.airlift.slice.Slices.EMPTY_SLICE;
-import static io.trino.rcfile.RcFileDecoderUtils.calculateTruncationLength;
-import static io.trino.rcfile.RcFileDecoderUtils.decodeVIntSize;
-import static io.trino.rcfile.RcFileDecoderUtils.readVInt;
-import static io.trino.rcfile.RcFileDecoderUtils.writeVInt;
+import static io.trino.hive.formats.ReadWriteUtils.calculateTruncationLength;
+import static io.trino.hive.formats.ReadWriteUtils.decodeVIntSize;
+import static io.trino.hive.formats.ReadWriteUtils.readVInt;
+import static io.trino.hive.formats.ReadWriteUtils.writeVInt;
import static java.lang.Math.toIntExact;
public class StringEncoding
@@ -61,7 +61,7 @@ public void encodeColumn(Block block, SliceOutput output, EncodeOutput encodeOut
public void encodeValueInto(Block block, int position, SliceOutput output)
{
Slice slice = type.getSlice(block, position);
- // Note strings nested in complex structures do no use the empty string marker
+ // Note strings nested in complex structures do not use the empty string marker
writeVInt(output, slice.length());
output.writeBytes(slice);
}
@@ -107,7 +107,7 @@ public int getValueLength(Slice slice, int offset)
@Override
public void decodeValueInto(BlockBuilder builder, Slice slice, int offset, int length)
{
- // Note strings nested in complex structures do no use the empty string marker
+ // Note strings nested in complex structures do not use the empty string marker
length = calculateTruncationLength(type, slice, offset, length);
type.writeSlice(builder, slice, offset, length);
}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/StructEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StructEncoding.java
similarity index 96%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/StructEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StructEncoding.java
index 9f7d48145fc9..1532be23fad9 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/StructEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StructEncoding.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
import com.google.common.collect.ImmutableList;
import io.airlift.slice.Slice;
@@ -89,8 +89,7 @@ public void decodeValueInto(BlockBuilder builder, Slice slice, int offset, int l
fieldId++;
}
- // Some times a struct does not have all fields written
- // so we fill with nulls
+ // Sometimes a struct does not have all fields written, so we fill with nulls
while (fieldId < structFields.size()) {
rowBuilder.appendNull();
fieldId++;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/TimestampEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/TimestampEncoding.java
similarity index 87%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/TimestampEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/TimestampEncoding.java
index 2f7d7134b902..89d7b3fb6de4 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/TimestampEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/TimestampEncoding.java
@@ -11,15 +11,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.binary;
+package io.trino.hive.formats.rcfile.binary;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
+import io.trino.hive.formats.ReadWriteUtils;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.TimestampHolder;
import io.trino.plugin.base.type.DecodedTimestamp;
import io.trino.plugin.base.type.TrinoTimestampEncoder;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
-import io.trino.rcfile.TimestampHolder;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.TimestampType;
@@ -29,10 +30,6 @@
import static io.airlift.slice.SizeOf.SIZE_OF_INT;
import static io.trino.plugin.base.type.TrinoTimestampEncoderFactory.createTimestampEncoder;
-import static io.trino.rcfile.RcFileDecoderUtils.decodeVIntSize;
-import static io.trino.rcfile.RcFileDecoderUtils.isNegativeVInt;
-import static io.trino.rcfile.RcFileDecoderUtils.readVInt;
-import static io.trino.rcfile.RcFileDecoderUtils.writeVInt;
import static io.trino.spi.type.Timestamps.MILLISECONDS_PER_SECOND;
import static java.util.Objects.requireNonNull;
@@ -100,12 +97,12 @@ public int getValueLength(Slice slice, int offset)
{
int length = 4;
if (hasNanosVInt(slice.getByte(offset))) {
- int nanosVintLength = decodeVIntSize(slice, offset + 4);
+ int nanosVintLength = ReadWriteUtils.decodeVIntSize(slice, offset + 4);
length += nanosVintLength;
// is there extra data for "seconds"
- if (isNegativeVInt(slice, offset + 4)) {
- length += decodeVIntSize(slice, offset + 4 + nanosVintLength);
+ if (ReadWriteUtils.isNegativeVInt(slice, offset + 4)) {
+ length += ReadWriteUtils.decodeVIntSize(slice, offset + 4 + nanosVintLength);
}
}
return length;
@@ -123,7 +120,7 @@ private static boolean hasNanosVInt(byte b)
return (b >> 7) != 0;
}
- private DecodedTimestamp getTimestamp(Slice slice, int offset)
+ private static DecodedTimestamp getTimestamp(Slice slice, int offset)
{
// read seconds (low 32 bits)
int lowest31BitsOfSecondsAndFlag = Integer.reverseBytes(slice.getInt(offset));
@@ -133,18 +130,18 @@ private DecodedTimestamp getTimestamp(Slice slice, int offset)
int nanos = 0;
if (lowest31BitsOfSecondsAndFlag < 0) {
// read nanos
- // this is an inline version of readVint so it can be stitched together
+ // this is an inline version of readVint, so it can be stitched together
// the code to read the seconds high bits below
byte nanosFirstByte = slice.getByte(offset);
- int nanosLength = decodeVIntSize(nanosFirstByte);
- nanos = (int) readVInt(slice, offset, nanosLength);
+ int nanosLength = ReadWriteUtils.decodeVIntSize(nanosFirstByte);
+ nanos = (int) ReadWriteUtils.readVInt(slice, offset, nanosLength);
nanos = decodeNanos(nanos);
// read seconds (high 32 bits)
- if (isNegativeVInt(nanosFirstByte)) {
+ if (ReadWriteUtils.isNegativeVInt(nanosFirstByte)) {
// We compose the seconds field from two parts. The lowest 31 bits come from the first four
// bytes. The higher-order bits come from the second VInt that follows the nanos field.
- long highBits = readVInt(slice, offset + nanosLength);
+ long highBits = ReadWriteUtils.readVInt(slice, offset + nanosLength);
seconds |= (highBits << 31);
}
}
@@ -208,12 +205,12 @@ private static void writeTimestamp(long seconds, int nanos, SliceOutput output)
if (hasSecondsHigh32 || nanosReversed != 0) {
// The sign of the reversed-nanoseconds field indicates that there is a second VInt present
int value = hasSecondsHigh32 ? ~nanosReversed : nanosReversed;
- writeVInt(output, value);
+ ReadWriteUtils.writeVInt(output, value);
}
if (hasSecondsHigh32) {
int secondsHigh32 = (int) (seconds >> 31);
- writeVInt(output, secondsHigh32);
+ ReadWriteUtils.writeVInt(output, secondsHigh32);
}
}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/BinaryEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BinaryEncoding.java
similarity index 95%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/BinaryEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BinaryEncoding.java
index dba5c0912aea..70a201e95809 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/BinaryEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BinaryEncoding.java
@@ -11,13 +11,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.text;
+package io.trino.hive.formats.rcfile.text;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
import io.airlift.slice.Slices;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/BlockEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BlockEncoding.java
similarity index 93%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/BlockEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BlockEncoding.java
index e4c9b486dd90..49ebb57f6ca4 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/BlockEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BlockEncoding.java
@@ -11,13 +11,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.text;
+package io.trino.hive.formats.rcfile.text;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
-import io.trino.rcfile.RcFileCorruptionException;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.RcFileCorruptionException;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/BooleanEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BooleanEncoding.java
similarity index 96%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/BooleanEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BooleanEncoding.java
index b540ff0b39d0..16fcc8d3db27 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/BooleanEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BooleanEncoding.java
@@ -11,13 +11,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.text;
+package io.trino.hive.formats.rcfile.text;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
import io.airlift.slice.Slices;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/DateEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DateEncoding.java
similarity index 96%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/DateEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DateEncoding.java
index 8ede2e33cd6d..e1bb3f9628d8 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/DateEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DateEncoding.java
@@ -11,12 +11,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.text;
+package io.trino.hive.formats.rcfile.text;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
@@ -88,7 +88,6 @@ public Block decodeColumn(ColumnData columnData)
builder.appendNull();
}
else {
- //noinspection deprecation
type.writeLong(builder, parseDate(slice, offset, length));
}
}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/DecimalEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DecimalEncoding.java
similarity index 97%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/DecimalEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DecimalEncoding.java
index 4fff1d62ea1b..0d1b254c5243 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/DecimalEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DecimalEncoding.java
@@ -11,12 +11,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.text;
+package io.trino.hive.formats.rcfile.text;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.DecimalType;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/DoubleEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DoubleEncoding.java
similarity index 94%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/DoubleEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DoubleEncoding.java
index 18eda352b42c..425349c6facb 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/DoubleEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DoubleEncoding.java
@@ -11,13 +11,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.text;
+package io.trino.hive.formats.rcfile.text;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
-import io.trino.rcfile.RcFileCorruptionException;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.RcFileCorruptionException;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/FloatEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/FloatEncoding.java
similarity index 94%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/FloatEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/FloatEncoding.java
index 6f64f52d5503..0fba42219358 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/FloatEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/FloatEncoding.java
@@ -11,13 +11,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.text;
+package io.trino.hive.formats.rcfile.text;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
-import io.trino.rcfile.RcFileCorruptionException;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.RcFileCorruptionException;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/ListEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/ListEncoding.java
similarity index 96%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/ListEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/ListEncoding.java
index 140a445633ab..87f0800ac6ce 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/ListEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/ListEncoding.java
@@ -11,11 +11,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.text;
+package io.trino.hive.formats.rcfile.text;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.RcFileCorruptionException;
+import io.trino.hive.formats.rcfile.RcFileCorruptionException;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/LongEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/LongEncoding.java
similarity index 96%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/LongEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/LongEncoding.java
index 280064a3f496..e9253fa8bd5e 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/LongEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/LongEncoding.java
@@ -11,13 +11,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.text;
+package io.trino.hive.formats.rcfile.text;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
import io.airlift.slice.Slices;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/MapEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/MapEncoding.java
similarity index 97%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/MapEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/MapEncoding.java
index ed5731e47fc3..9000401c8b99 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/MapEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/MapEncoding.java
@@ -11,11 +11,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.text;
+package io.trino.hive.formats.rcfile.text;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.RcFileCorruptionException;
+import io.trino.hive.formats.rcfile.RcFileCorruptionException;
import io.trino.spi.StandardErrorCode;
import io.trino.spi.TrinoException;
import io.trino.spi.block.Block;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/StringEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/StringEncoding.java
similarity index 95%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/StringEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/StringEncoding.java
index be7a1a8bf427..3fc5fa753638 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/StringEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/StringEncoding.java
@@ -11,18 +11,18 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.text;
+package io.trino.hive.formats.rcfile.text;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
import io.airlift.slice.Slices;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
-import static io.trino.rcfile.RcFileDecoderUtils.calculateTruncationLength;
+import static io.trino.hive.formats.ReadWriteUtils.calculateTruncationLength;
public class StringEncoding
implements TextColumnEncoding
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/StructEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/StructEncoding.java
similarity index 97%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/StructEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/StructEncoding.java
index d70941058fc8..560cbdfda5c8 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/StructEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/StructEncoding.java
@@ -11,11 +11,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.text;
+package io.trino.hive.formats.rcfile.text;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.RcFileCorruptionException;
+import io.trino.hive.formats.rcfile.RcFileCorruptionException;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.Type;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/TextColumnEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TextColumnEncoding.java
similarity index 86%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/TextColumnEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TextColumnEncoding.java
index 91d2890bead5..4f3da2c52ac0 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/TextColumnEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TextColumnEncoding.java
@@ -11,12 +11,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.text;
+package io.trino.hive.formats.rcfile.text;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.trino.rcfile.ColumnEncoding;
-import io.trino.rcfile.RcFileCorruptionException;
+import io.trino.hive.formats.rcfile.ColumnEncoding;
+import io.trino.hive.formats.rcfile.RcFileCorruptionException;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/TextRcFileEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TextRcFileEncoding.java
similarity index 97%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/TextRcFileEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TextRcFileEncoding.java
index 8186ee23d366..4eb1781ab642 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/TextRcFileEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TextRcFileEncoding.java
@@ -11,12 +11,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.text;
+package io.trino.hive.formats.rcfile.text;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
-import io.trino.rcfile.ColumnEncoding;
-import io.trino.rcfile.RcFileEncoding;
+import io.trino.hive.formats.rcfile.ColumnEncoding;
+import io.trino.hive.formats.rcfile.RcFileEncoding;
import io.trino.spi.type.TimestampType;
import io.trino.spi.type.Type;
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/TimestampEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TimestampEncoding.java
similarity index 96%
rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/TimestampEncoding.java
rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TimestampEncoding.java
index 2230fdf7bb6b..a4630a42b11b 100644
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/TimestampEncoding.java
+++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TimestampEncoding.java
@@ -11,15 +11,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile.text;
+package io.trino.hive.formats.rcfile.text;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
+import io.trino.hive.formats.rcfile.ColumnData;
+import io.trino.hive.formats.rcfile.EncodeOutput;
+import io.trino.hive.formats.rcfile.TimestampHolder;
import io.trino.plugin.base.type.DecodedTimestamp;
import io.trino.plugin.base.type.TrinoTimestampEncoder;
-import io.trino.rcfile.ColumnData;
-import io.trino.rcfile.EncodeOutput;
-import io.trino.rcfile.TimestampHolder;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.TimestampType;
diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestDataOutputStream.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestDataOutputStream.java
new file mode 100644
index 000000000000..290e5a709714
--- /dev/null
+++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestDataOutputStream.java
@@ -0,0 +1,234 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.hive.formats;
+
+import io.airlift.slice.Slice;
+import io.airlift.slice.Slices;
+import org.openjdk.jol.info.ClassLayout;
+import org.testng.annotations.Test;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.concurrent.ThreadLocalRandom;
+
+import static org.testng.Assert.assertEquals;
+
+public class TestDataOutputStream
+{
+ @Test
+ public void testEncodingBoolean()
+ throws Exception
+ {
+ assertEncoding(sliceOutput -> sliceOutput.writeBoolean(true),
+ new byte[] {1});
+ assertEncoding(sliceOutput -> sliceOutput.writeBoolean(false),
+ new byte[] {0});
+ }
+
+ @Test
+ public void testEncodingByte()
+ throws Exception
+ {
+ assertEncoding(sliceOutput -> sliceOutput.writeByte(92),
+ new byte[] {92});
+ assertEncoding(sliceOutput -> sliceOutput.writeByte(156),
+ new byte[] {-100});
+ assertEncoding(sliceOutput -> sliceOutput.writeByte(-17),
+ new byte[] {-17});
+
+ assertEncoding(sliceOutput -> sliceOutput.write(92),
+ new byte[] {92});
+ assertEncoding(sliceOutput -> sliceOutput.write(156),
+ new byte[] {-100});
+ assertEncoding(sliceOutput -> sliceOutput.write(-17),
+ new byte[] {-17});
+ }
+
+ @Test
+ public void testEncodingShort()
+ throws Exception
+ {
+ assertEncoding(sliceOutput -> sliceOutput.writeShort(23661),
+ new byte[] {109, 92});
+ assertEncoding(sliceOutput -> sliceOutput.writeShort(40045),
+ new byte[] {109, -100});
+ assertEncoding(sliceOutput -> sliceOutput.writeShort(-27188),
+ new byte[] {-52, -107});
+ }
+
+ @Test
+ public void testEncodingInteger()
+ throws Exception
+ {
+ assertEncoding(sliceOutput -> sliceOutput.writeInt(978017389),
+ new byte[] {109, 92, 75, 58});
+ assertEncoding(sliceOutput -> sliceOutput.writeInt(-7813904),
+ new byte[] {-16, -60, -120, -1});
+ }
+
+ @Test
+ public void testEncodingLong()
+ throws Exception
+ {
+ assertEncoding(sliceOutput -> sliceOutput.writeLong(9214541725452766769L),
+ new byte[] {49, -114, -96, -23, -32, -96, -32, 127});
+ assertEncoding(sliceOutput -> sliceOutput.writeLong(-1184314682315678611L),
+ new byte[] {109, 92, 75, 58, 18, 120, -112, -17});
+ }
+
+ @Test
+ public void testEncodingDouble()
+ throws Exception
+ {
+ assertEncoding(sliceOutput -> sliceOutput.writeDouble(3.14),
+ new byte[] {31, -123, -21, 81, -72, 30, 9, 64});
+ assertEncoding(sliceOutput -> sliceOutput.writeDouble(Double.NaN),
+ new byte[] {0, 0, 0, 0, 0, 0, -8, 127});
+ assertEncoding(sliceOutput -> sliceOutput.writeDouble(Double.NEGATIVE_INFINITY),
+ new byte[] {0, 0, 0, 0, 0, 0, -16, -1});
+ assertEncoding(sliceOutput -> sliceOutput.writeDouble(Double.POSITIVE_INFINITY),
+ new byte[] {0, 0, 0, 0, 0, 0, -16, 127});
+ }
+
+ @Test
+ public void testEncodingFloat()
+ throws Exception
+ {
+ assertEncoding(sliceOutput -> sliceOutput.writeFloat(3.14f),
+ new byte[] {-61, -11, 72, 64});
+ assertEncoding(sliceOutput -> sliceOutput.writeFloat(Float.NaN),
+ new byte[] {0, 0, -64, 127});
+ assertEncoding(sliceOutput -> sliceOutput.writeFloat(Float.NEGATIVE_INFINITY),
+ new byte[] {0, 0, -128, -1});
+ assertEncoding(sliceOutput -> sliceOutput.writeFloat(Float.POSITIVE_INFINITY),
+ new byte[] {0, 0, -128, 127});
+ }
+
+ @Test
+ public void testEncodingBytes()
+ throws Exception
+ {
+ byte[] data = new byte[18000];
+ ThreadLocalRandom.current().nextBytes(data);
+
+ assertEncoding(sliceOutput -> sliceOutput.write(data), data);
+ assertEncoding(sliceOutput -> sliceOutput.write(data, 0, 0), Arrays.copyOfRange(data, 0, 0));
+ assertEncoding(sliceOutput -> sliceOutput.write(data, 0, 3), Arrays.copyOfRange(data, 0, 3));
+ assertEncoding(sliceOutput -> sliceOutput.write(data, 0, 370), Arrays.copyOfRange(data, 0, 370));
+ assertEncoding(sliceOutput -> sliceOutput.write(data, 0, 4095), Arrays.copyOfRange(data, 0, 4095));
+ assertEncoding(sliceOutput -> sliceOutput.write(data, 0, 4096), Arrays.copyOfRange(data, 0, 4096));
+ assertEncoding(sliceOutput -> sliceOutput.write(data, 0, 12348), Arrays.copyOfRange(data, 0, 12348));
+ assertEncoding(sliceOutput -> sliceOutput.write(data, 0, 16384), Arrays.copyOfRange(data, 0, 16384));
+ assertEncoding(sliceOutput -> sliceOutput.write(data, 0, 18000), Arrays.copyOfRange(data, 0, 18000));
+ }
+
+ @Test
+ public void testEncodingSlice()
+ throws Exception
+ {
+ byte[] data = new byte[18000];
+ ThreadLocalRandom.current().nextBytes(data);
+ Slice slice = Slices.wrappedBuffer(data);
+
+ assertEncoding(sliceOutput -> sliceOutput.write(slice), data);
+ assertEncoding(sliceOutput -> sliceOutput.write(slice, 0, 0), Arrays.copyOfRange(data, 0, 0));
+ assertEncoding(sliceOutput -> sliceOutput.write(slice, 0, 3), Arrays.copyOfRange(data, 0, 3));
+ assertEncoding(sliceOutput -> sliceOutput.write(slice, 0, 370), Arrays.copyOfRange(data, 0, 370));
+ assertEncoding(sliceOutput -> sliceOutput.write(slice, 0, 4095), Arrays.copyOfRange(data, 0, 4095));
+ assertEncoding(sliceOutput -> sliceOutput.write(slice, 0, 4096), Arrays.copyOfRange(data, 0, 4096));
+ assertEncoding(sliceOutput -> sliceOutput.write(slice, 0, 12348), Arrays.copyOfRange(data, 0, 12348));
+ assertEncoding(sliceOutput -> sliceOutput.write(slice, 0, 16384), Arrays.copyOfRange(data, 0, 16384));
+ assertEncoding(sliceOutput -> sliceOutput.write(slice, 0, 18000), Arrays.copyOfRange(data, 0, 18000));
+ }
+
+ @Test
+ public void testWriteZero()
+ throws Exception
+ {
+ assertEncoding(sliceOutput -> sliceOutput.writeZero(0), new byte[0]);
+ assertEncoding(sliceOutput -> sliceOutput.writeZero(1), new byte[1]);
+ assertEncoding(sliceOutput -> sliceOutput.writeZero(2), new byte[2]);
+ assertEncoding(sliceOutput -> sliceOutput.writeZero(3), new byte[3]);
+ assertEncoding(sliceOutput -> sliceOutput.writeZero(4), new byte[4]);
+ assertEncoding(sliceOutput -> sliceOutput.writeZero(6), new byte[6]);
+ assertEncoding(sliceOutput -> sliceOutput.writeZero(7), new byte[7]);
+ assertEncoding(sliceOutput -> sliceOutput.writeZero(8), new byte[8]);
+ assertEncoding(sliceOutput -> sliceOutput.writeZero(9), new byte[9]);
+ assertEncoding(sliceOutput -> sliceOutput.writeZero(16), new byte[16]);
+ assertEncoding(sliceOutput -> sliceOutput.writeZero(22), new byte[22]);
+ assertEncoding(sliceOutput -> sliceOutput.writeZero(227), new byte[227]);
+ assertEncoding(sliceOutput -> sliceOutput.writeZero(4227), new byte[4227]);
+ assertEncoding(sliceOutput -> sliceOutput.writeZero(18349), new byte[18349]);
+ }
+
+ @Test
+ public void testRetainedSize()
+ throws IOException
+ {
+ int bufferSize = 1337;
+ DataOutputStream output = new DataOutputStream(new ByteArrayOutputStream(0), bufferSize);
+
+ long originalRetainedSize = output.getRetainedSize();
+ assertEquals(originalRetainedSize, ClassLayout.parseClass(DataOutputStream.class).instanceSize() + Slices.allocate(bufferSize).getRetainedSize());
+ output.writeLong(0);
+ output.writeShort(0);
+ assertEquals(output.getRetainedSize(), originalRetainedSize);
+ }
+
+ /**
+ * Asserting different offsets of operations.
+ */
+ private static void assertEncoding(DataOutputTester operations, byte... expected)
+ throws IOException
+ {
+ assertEncoding(operations, 0, expected);
+ assertEncoding(operations, 1, expected);
+ assertEncoding(operations, 2, expected);
+ assertEncoding(operations, 3, expected);
+ assertEncoding(operations, 4, expected);
+ assertEncoding(operations, 7, expected);
+ assertEncoding(operations, 8, expected);
+ assertEncoding(operations, 16, expected);
+ assertEncoding(operations, 511, expected);
+ assertEncoding(operations, 12000, expected);
+ assertEncoding(operations, 13000, expected);
+ assertEncoding(operations, 16000, expected);
+ assertEncoding(operations, 16380, expected);
+ assertEncoding(operations, 16383, expected);
+ assertEncoding(operations, 16384, expected);
+ assertEncoding(operations, 18349, expected);
+ }
+
+ private static void assertEncoding(DataOutputTester operations, int offset, byte... output)
+ throws IOException
+ {
+ ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+ try (DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream, 16384)) {
+ dataOutputStream.writeZero(offset);
+ operations.test(dataOutputStream);
+ assertEquals(dataOutputStream.longSize(), offset + output.length);
+ }
+
+ byte[] expected = new byte[offset + output.length];
+ System.arraycopy(output, 0, expected, offset, output.length);
+ assertEquals(byteArrayOutputStream.toByteArray(), expected);
+ }
+
+ private interface DataOutputTester
+ {
+ void test(DataOutputStream dataOutputStream)
+ throws IOException;
+ }
+}
diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestDataSeekableInputStream.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestDataSeekableInputStream.java
new file mode 100644
index 000000000000..8b91d6c820a3
--- /dev/null
+++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestDataSeekableInputStream.java
@@ -0,0 +1,733 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.hive.formats;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.io.ByteSource;
+import com.google.common.io.ByteStreams;
+import io.airlift.slice.Slice;
+import io.airlift.slice.Slices;
+import io.trino.filesystem.memory.MemorySeekableInputStream;
+import org.apache.iceberg.io.SeekableInputStream;
+import org.openjdk.jol.info.ClassLayout;
+import org.testng.annotations.Test;
+
+import java.io.ByteArrayOutputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.List;
+
+import static com.google.common.collect.Iterables.cycle;
+import static io.airlift.slice.SizeOf.SIZE_OF_BYTE;
+import static io.airlift.slice.SizeOf.SIZE_OF_DOUBLE;
+import static io.airlift.slice.SizeOf.SIZE_OF_FLOAT;
+import static io.airlift.slice.SizeOf.SIZE_OF_INT;
+import static io.airlift.slice.SizeOf.SIZE_OF_LONG;
+import static io.airlift.slice.SizeOf.SIZE_OF_SHORT;
+import static io.airlift.slice.SizeOf.sizeOfByteArray;
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
+
+@SuppressWarnings("resource")
+public class TestDataSeekableInputStream
+{
+ private static final int BUFFER_SIZE = 129;
+
+ private static final List VARIABLE_READ_SIZES = ImmutableList.of(
+ 1,
+ 7,
+ 15,
+ BUFFER_SIZE - 1,
+ BUFFER_SIZE,
+ BUFFER_SIZE + 1,
+ BUFFER_SIZE + 13);
+
+ @Test
+ public void testReadBoolean()
+ throws IOException
+ {
+ testDataInput(new DataInputTester(SIZE_OF_BYTE)
+ {
+ @Override
+ public void loadValue(DataOutputStream output, int valueIndex)
+ throws IOException
+ {
+ output.writeBoolean(valueIndex % 2 == 0);
+ }
+
+ @Override
+ public void verifyValue(DataSeekableInputStream input, int valueIndex)
+ throws IOException
+ {
+ assertEquals(input.readBoolean(), valueIndex % 2 == 0);
+ }
+ });
+ }
+
+ @Test
+ public void testReadByte()
+ throws IOException
+ {
+ testDataInput(new DataInputTester(SIZE_OF_BYTE)
+ {
+ @Override
+ public void loadValue(DataOutputStream output, int valueIndex)
+ throws IOException
+ {
+ output.writeByte((byte) valueIndex);
+ }
+
+ @Override
+ public void verifyValue(DataSeekableInputStream input, int valueIndex)
+ throws IOException
+ {
+ assertEquals(input.readByte(), (byte) valueIndex);
+ }
+ });
+ }
+
+ @Test
+ public void testRead()
+ throws IOException
+ {
+ testDataInput(new DataInputTester(SIZE_OF_BYTE)
+ {
+ @Override
+ public void loadValue(DataOutputStream output, int valueIndex)
+ throws IOException
+ {
+ output.writeByte((byte) valueIndex);
+ }
+
+ @Override
+ public void verifyValue(DataSeekableInputStream input, int valueIndex)
+ throws IOException
+ {
+ assertEquals(input.read(), valueIndex & 0xFF);
+ }
+
+ @Override
+ public void verifyReadOffEnd(DataSeekableInputStream input)
+ throws IOException
+ {
+ assertEquals(input.read(), -1);
+ }
+ });
+ }
+
+ @Test
+ public void testReadShort()
+ throws IOException
+ {
+ testDataInput(new DataInputTester(SIZE_OF_SHORT)
+ {
+ @Override
+ public void loadValue(DataOutputStream output, int valueIndex)
+ throws IOException
+ {
+ output.writeShort(valueIndex);
+ }
+
+ @Override
+ public void verifyValue(DataSeekableInputStream input, int valueIndex)
+ throws IOException
+ {
+ assertEquals(input.readShort(), (short) valueIndex);
+ }
+ });
+ }
+
+ @Test
+ public void testReadUnsignedShort()
+ throws IOException
+ {
+ testDataInput(new DataInputTester(SIZE_OF_SHORT)
+ {
+ @Override
+ public void loadValue(DataOutputStream output, int valueIndex)
+ throws IOException
+ {
+ output.writeShort(valueIndex);
+ }
+
+ @Override
+ public void verifyValue(DataSeekableInputStream input, int valueIndex)
+ throws IOException
+ {
+ assertEquals(input.readUnsignedShort(), valueIndex & 0xFFF);
+ }
+ });
+ }
+
+ @Test
+ public void testReadInt()
+ throws IOException
+ {
+ testDataInput(new DataInputTester(SIZE_OF_INT)
+ {
+ @Override
+ public void loadValue(DataOutputStream output, int valueIndex)
+ throws IOException
+ {
+ output.writeInt(valueIndex);
+ }
+
+ @Override
+ public void verifyValue(DataSeekableInputStream input, int valueIndex)
+ throws IOException
+ {
+ assertEquals(input.readInt(), valueIndex);
+ }
+ });
+ }
+
+ @Test
+ public void testUnsignedReadInt()
+ throws IOException
+ {
+ testDataInput(new DataInputTester(SIZE_OF_INT)
+ {
+ @Override
+ public void loadValue(DataOutputStream output, int valueIndex)
+ throws IOException
+ {
+ output.writeInt(valueIndex);
+ }
+
+ @Override
+ public void verifyValue(DataSeekableInputStream input, int valueIndex)
+ throws IOException
+ {
+ assertEquals(input.readUnsignedInt(), valueIndex);
+ }
+ });
+ }
+
+ @Test
+ public void testReadLong()
+ throws IOException
+ {
+ testDataInput(new DataInputTester(SIZE_OF_LONG)
+ {
+ @Override
+ public void loadValue(DataOutputStream output, int valueIndex)
+ throws IOException
+ {
+ output.writeLong(valueIndex);
+ }
+
+ @Override
+ public void verifyValue(DataSeekableInputStream input, int valueIndex)
+ throws IOException
+ {
+ assertEquals(input.readLong(), valueIndex);
+ }
+ });
+ }
+
+ @Test
+ public void testReadFloat()
+ throws IOException
+ {
+ testDataInput(new DataInputTester(SIZE_OF_FLOAT)
+ {
+ @Override
+ public void loadValue(DataOutputStream output, int valueIndex)
+ throws IOException
+ {
+ output.writeFloat(valueIndex + 0.12f);
+ }
+
+ @Override
+ public void verifyValue(DataSeekableInputStream input, int valueIndex)
+ throws IOException
+ {
+ assertEquals(input.readFloat(), valueIndex + 0.12f);
+ }
+ });
+ }
+
+ @Test
+ public void testReadDouble()
+ throws IOException
+ {
+ testDataInput(new DataInputTester(SIZE_OF_DOUBLE)
+ {
+ @Override
+ public void loadValue(DataOutputStream output, int valueIndex)
+ throws IOException
+ {
+ output.writeDouble(valueIndex + 0.12);
+ }
+
+ @Override
+ public void verifyValue(DataSeekableInputStream input, int valueIndex)
+ throws IOException
+ {
+ assertEquals(input.readDouble(), valueIndex + 0.12);
+ }
+ });
+ }
+
+ @Test
+ public void testSkip()
+ throws IOException
+ {
+ for (int readSize : VARIABLE_READ_SIZES) {
+ // skip without any reads
+ testDataInput(new SkipDataInputTester(readSize)
+ {
+ @Override
+ public void verifyValue(DataSeekableInputStream input, int valueIndex)
+ throws IOException
+ {
+ input.skip(valueSize());
+ }
+
+ @Override
+ public void verifyReadOffEnd(DataSeekableInputStream input)
+ throws IOException
+ {
+ assertEquals(input.skip(valueSize()), valueSize() - 1);
+ }
+ });
+ testDataInput(new SkipDataInputTester(readSize)
+ {
+ @Override
+ public void verifyValue(DataSeekableInputStream input, int valueIndex)
+ throws IOException
+ {
+ input.skipBytes(valueSize());
+ }
+
+ @Override
+ public void verifyReadOffEnd(DataSeekableInputStream input)
+ throws IOException
+ {
+ assertEquals(input.skip(valueSize()), valueSize() - 1);
+ }
+ });
+
+ // read when no data available to force buffering
+ testDataInput(new SkipDataInputTester(readSize)
+ {
+ @Override
+ public void verifyValue(DataSeekableInputStream input, int valueIndex)
+ throws IOException
+ {
+ int length = valueSize();
+ while (length > 0) {
+ if (input.available() == 0) {
+ input.readByte();
+ length--;
+ }
+ int skipSize = input.skipBytes(length);
+ length -= skipSize;
+ }
+ assertEquals(input.skip(0), 0);
+ }
+ });
+ testDataInput(new SkipDataInputTester(readSize)
+ {
+ @Override
+ public void verifyValue(DataSeekableInputStream input, int valueIndex)
+ throws IOException
+ {
+ long length = valueSize();
+ while (length > 0) {
+ if (input.available() == 0) {
+ input.readByte();
+ length--;
+ }
+ long skipSize = input.skip(length);
+ length -= skipSize;
+ }
+ assertEquals(input.skip(0), 0);
+ }
+ });
+ }
+ }
+
+ @Test
+ public void testReadSlice()
+ throws IOException
+ {
+ for (int readSize : VARIABLE_READ_SIZES) {
+ testDataInput(new StringDataInputTester(readSize)
+ {
+ @Override
+ public String readActual(DataSeekableInputStream input)
+ throws IOException
+ {
+ return input.readSlice(valueSize()).toStringUtf8();
+ }
+ });
+ }
+ }
+
+ @Test
+ public void testReadFully()
+ throws IOException
+ {
+ for (int readSize : VARIABLE_READ_SIZES) {
+ testDataInput(new StringDataInputTester(readSize)
+ {
+ @Override
+ public String readActual(DataSeekableInputStream input)
+ throws IOException
+ {
+ Slice slice = Slices.allocate(valueSize());
+ input.readFully(slice);
+ return slice.toStringUtf8();
+ }
+ });
+ testDataInput(new StringDataInputTester(readSize)
+ {
+ @Override
+ public String readActual(DataSeekableInputStream input)
+ throws IOException
+ {
+ Slice slice = Slices.allocate(valueSize() + 10);
+ input.readFully(slice, 5, valueSize());
+ return slice.slice(5, valueSize()).toStringUtf8();
+ }
+ });
+ testDataInput(new StringDataInputTester(readSize)
+ {
+ @Override
+ public String readActual(DataSeekableInputStream input)
+ throws IOException
+ {
+ byte[] bytes = new byte[valueSize()];
+ input.readFully(bytes, 0, valueSize());
+ return new String(bytes, 0, valueSize(), UTF_8);
+ }
+ });
+ testDataInput(new StringDataInputTester(readSize)
+ {
+ @Override
+ public String readActual(DataSeekableInputStream input)
+ throws IOException
+ {
+ byte[] bytes = new byte[valueSize() + 10];
+ input.readFully(bytes, 5, valueSize());
+ return new String(bytes, 5, valueSize(), UTF_8);
+ }
+ });
+ testDataInput(new StringDataInputTester(readSize)
+ {
+ @Override
+ public String readActual(DataSeekableInputStream input)
+ throws IOException
+ {
+ byte[] bytes = new byte[valueSize()];
+ int bytesRead = input.read(bytes);
+ if (bytesRead == -1) {
+ throw new EOFException();
+ }
+ assertTrue(bytesRead > 0, "Expected to read at least one byte");
+ input.readFully(bytes, bytesRead, bytes.length - bytesRead);
+ return new String(bytes, 0, valueSize(), UTF_8);
+ }
+ });
+ testDataInput(new StringDataInputTester(readSize)
+ {
+ @Override
+ public String readActual(DataSeekableInputStream input)
+ throws IOException
+ {
+ byte[] bytes = new byte[valueSize() + 10];
+ ByteStreams.readFully(input, bytes, 5, valueSize());
+ return new String(bytes, 5, valueSize(), UTF_8);
+ }
+ });
+ testDataInput(new StringDataInputTester(readSize)
+ {
+ @Override
+ public String readActual(DataSeekableInputStream input)
+ throws IOException
+ {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ input.readFully(out, valueSize());
+ return out.toString(UTF_8);
+ }
+ });
+ }
+ }
+
+ @Test
+ public void testEmptyInput()
+ throws Exception
+ {
+ DataSeekableInputStream input = createDataSeekableInputStream(new byte[0]);
+ assertEquals(input.getPos(), 0);
+ }
+
+ @Test
+ public void testEmptyRead()
+ throws Exception
+ {
+ DataSeekableInputStream input = createDataSeekableInputStream(new byte[0]);
+ assertEquals(input.read(), -1);
+ }
+
+ @Test(expectedExceptions = EOFException.class)
+ public void testReadByteBeyondEnd()
+ throws Exception
+ {
+ DataSeekableInputStream input = createDataSeekableInputStream(new byte[0]);
+ input.readByte();
+ }
+
+ @Test(expectedExceptions = EOFException.class)
+ public void testReadShortBeyondEnd()
+ throws Exception
+ {
+ DataSeekableInputStream input = createDataSeekableInputStream(new byte[1]);
+ input.readShort();
+ }
+
+ @Test(expectedExceptions = EOFException.class)
+ public void testReadIntBeyondEnd()
+ throws Exception
+ {
+ DataSeekableInputStream input = createDataSeekableInputStream(new byte[3]);
+ input.readInt();
+ }
+
+ @Test(expectedExceptions = EOFException.class)
+ public void testReadLongBeyondEnd()
+ throws Exception
+ {
+ DataSeekableInputStream input = createDataSeekableInputStream(new byte[7]);
+ input.readLong();
+ }
+
+ @Test
+ public void testEncodingBoolean()
+ throws Exception
+ {
+ assertTrue(createDataSeekableInputStream(new byte[] {1}).readBoolean());
+ assertFalse(createDataSeekableInputStream(new byte[] {0}).readBoolean());
+ }
+
+ @Test
+ public void testEncodingByte()
+ throws Exception
+ {
+ assertEquals(createDataSeekableInputStream(new byte[] {92}).readByte(), 92);
+ assertEquals(createDataSeekableInputStream(new byte[] {-100}).readByte(), -100);
+ assertEquals(createDataSeekableInputStream(new byte[] {-17}).readByte(), -17);
+
+ assertEquals(createDataSeekableInputStream(new byte[] {92}).readUnsignedByte(), 92);
+ assertEquals(createDataSeekableInputStream(new byte[] {-100}).readUnsignedByte(), 156);
+ assertEquals(createDataSeekableInputStream(new byte[] {-17}).readUnsignedByte(), 239);
+ }
+
+ @Test
+ public void testEncodingShort()
+ throws Exception
+ {
+ assertEquals(createDataSeekableInputStream(new byte[] {109, 92}).readShort(), 23661);
+ assertEquals(createDataSeekableInputStream(new byte[] {109, -100}).readShort(), -25491);
+ assertEquals(createDataSeekableInputStream(new byte[] {-52, -107}).readShort(), -27188);
+
+ assertEquals(createDataSeekableInputStream(new byte[] {109, -100}).readUnsignedShort(), 40045);
+ assertEquals(createDataSeekableInputStream(new byte[] {-52, -107}).readUnsignedShort(), 38348);
+ }
+
+ @Test
+ public void testEncodingInteger()
+ throws Exception
+ {
+ assertEquals(createDataSeekableInputStream(new byte[] {109, 92, 75, 58}).readInt(), 978017389);
+ assertEquals(createDataSeekableInputStream(new byte[] {-16, -60, -120, -1}).readInt(), -7813904);
+ }
+
+ @Test
+ public void testEncodingLong()
+ throws Exception
+ {
+ assertEquals(createDataSeekableInputStream(new byte[] {49, -114, -96, -23, -32, -96, -32, 127}).readLong(), 9214541725452766769L);
+ assertEquals(createDataSeekableInputStream(new byte[] {109, 92, 75, 58, 18, 120, -112, -17}).readLong(), -1184314682315678611L);
+ }
+
+ @Test
+ public void testEncodingDouble()
+ throws Exception
+ {
+ assertEquals(createDataSeekableInputStream(new byte[] {31, -123, -21, 81, -72, 30, 9, 64}).readDouble(), 3.14);
+ assertEquals(createDataSeekableInputStream(new byte[] {0, 0, 0, 0, 0, 0, -8, 127}).readDouble(), Double.NaN);
+ assertEquals(createDataSeekableInputStream(new byte[] {0, 0, 0, 0, 0, 0, -16, -1}).readDouble(), Double.NEGATIVE_INFINITY);
+ assertEquals(createDataSeekableInputStream(new byte[] {0, 0, 0, 0, 0, 0, -16, 127}).readDouble(), Double.POSITIVE_INFINITY);
+ }
+
+ @Test
+ public void testEncodingFloat()
+ throws Exception
+ {
+ assertEquals(createDataSeekableInputStream(new byte[] {-61, -11, 72, 64}).readFloat(), 3.14f);
+ assertEquals(createDataSeekableInputStream(new byte[] {0, 0, -64, 127}).readFloat(), Float.NaN);
+ assertEquals(createDataSeekableInputStream(new byte[] {0, 0, -128, -1}).readFloat(), Float.NEGATIVE_INFINITY);
+ assertEquals(createDataSeekableInputStream(new byte[] {0, 0, -128, 127}).readFloat(), Float.POSITIVE_INFINITY);
+ }
+
+ @Test
+ public void testRetainedSize()
+ {
+ int bufferSize = 1024;
+ SeekableInputStream inputStream = new MemorySeekableInputStream(Slices.wrappedBuffer(new byte[] {0, 1}));
+ DataSeekableInputStream input = new DataSeekableInputStream(inputStream, bufferSize);
+ assertEquals(input.getRetainedSize(), ClassLayout.parseClass(DataSeekableInputStream.class).instanceSize() + sizeOfByteArray(bufferSize));
+ }
+
+ private static void testDataInput(DataInputTester tester)
+ throws IOException
+ {
+ int size = (BUFFER_SIZE * 3) + 10;
+ ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(size);
+ try (DataOutputStream output = new DataOutputStream(byteArrayOutputStream)) {
+ for (int i = 0; i < size / tester.valueSize(); i++) {
+ tester.loadValue(output, i);
+ }
+ }
+ byte[] bytes = byteArrayOutputStream.toByteArray();
+
+ testReadForward(tester, bytes);
+ testReadReverse(tester, bytes);
+ testReadOffEnd(tester, bytes);
+ }
+
+ private static void testReadForward(DataInputTester tester, byte[] bytes)
+ throws IOException
+ {
+ DataSeekableInputStream input = createDataSeekableInputStream(bytes);
+ for (int i = 0; i < bytes.length / tester.valueSize(); i++) {
+ int position = i * tester.valueSize();
+ assertEquals(input.getPos(), position);
+ tester.verifyValue(input, i);
+ }
+ }
+
+ private static void testReadReverse(DataInputTester tester, byte[] bytes)
+ throws IOException
+ {
+ DataSeekableInputStream input = createDataSeekableInputStream(bytes);
+ for (int i = bytes.length / tester.valueSize() - 1; i >= 0; i--) {
+ int position = i * tester.valueSize();
+ input.seek(position);
+ assertEquals(input.getPos(), position);
+ tester.verifyValue(input, i);
+ }
+ }
+
+ private static void testReadOffEnd(DataInputTester tester, byte[] bytes)
+ throws IOException
+ {
+ DataSeekableInputStream input = createDataSeekableInputStream(bytes);
+ ByteStreams.skipFully(input, bytes.length - tester.valueSize() + 1);
+ tester.verifyReadOffEnd(input);
+ }
+
+ private static String getExpectedStringValue(int index, int size)
+ throws IOException
+ {
+ return ByteSource.concat(cycle(ByteSource.wrap(String.valueOf(index).getBytes(UTF_8)))).slice(0, size).asCharSource(UTF_8).read();
+ }
+
+ protected abstract static class DataInputTester
+ {
+ private final int size;
+
+ public DataInputTester(int size)
+ {
+ this.size = size;
+ }
+
+ public final int valueSize()
+ {
+ return size;
+ }
+
+ public abstract void loadValue(DataOutputStream slice, int valueIndex)
+ throws IOException;
+
+ public abstract void verifyValue(DataSeekableInputStream input, int valueIndex)
+ throws IOException;
+
+ public void verifyReadOffEnd(DataSeekableInputStream input)
+ throws IOException
+ {
+ try {
+ verifyValue(input, 1);
+ fail("expected EOFException");
+ }
+ catch (EOFException expected) {
+ }
+ }
+ }
+
+ private abstract static class SkipDataInputTester
+ extends DataInputTester
+ {
+ public SkipDataInputTester(int size)
+ {
+ super(size);
+ }
+
+ @Override
+ public void loadValue(DataOutputStream output, int valueIndex)
+ throws IOException
+ {
+ output.write(new byte[valueSize()]);
+ }
+ }
+
+ private abstract static class StringDataInputTester
+ extends DataInputTester
+ {
+ public StringDataInputTester(int size)
+ {
+ super(size);
+ }
+
+ @Override
+ public final void loadValue(DataOutputStream output, int valueIndex)
+ throws IOException
+ {
+ output.write(getExpectedStringValue(valueIndex, valueSize()).getBytes(UTF_8));
+ }
+
+ @Override
+ public final void verifyValue(DataSeekableInputStream input, int valueIndex)
+ throws IOException
+ {
+ String actual = readActual(input);
+ String expected = getExpectedStringValue(valueIndex, valueSize());
+ assertEquals(actual, expected);
+ }
+
+ protected abstract String readActual(DataSeekableInputStream input)
+ throws IOException;
+ }
+
+ private static DataSeekableInputStream createDataSeekableInputStream(byte[] bytes)
+ {
+ SeekableInputStream inputStream = new MemorySeekableInputStream(Slices.wrappedBuffer(bytes));
+ return new DataSeekableInputStream(inputStream, 16 * 1024);
+ }
+}
diff --git a/lib/trino-rcfile/src/test/java/io/trino/rcfile/TestRcFileDecoderUtils.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestReadWriteUtils.java
similarity index 89%
rename from lib/trino-rcfile/src/test/java/io/trino/rcfile/TestRcFileDecoderUtils.java
rename to lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestReadWriteUtils.java
index 070965c6b2dd..153c3d2ee536 100644
--- a/lib/trino-rcfile/src/test/java/io/trino/rcfile/TestRcFileDecoderUtils.java
+++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestReadWriteUtils.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
@@ -23,7 +23,7 @@
import static org.testng.Assert.assertEquals;
-public class TestRcFileDecoderUtils
+public class TestReadWriteUtils
{
@Test
public void testVInt()
@@ -58,10 +58,10 @@ private static void assertVIntRoundTrip(SliceOutput output, long value)
long readValueOld = WritableUtils.readVLong(oldBytes.getInput());
assertEquals(readValueOld, value);
- long readValueNew = RcFileDecoderUtils.readVInt(oldBytes, 0);
+ long readValueNew = ReadWriteUtils.readVInt(oldBytes, 0);
assertEquals(readValueNew, value);
- long readValueNewStream = RcFileDecoderUtils.readVInt(oldBytes.getInput());
+ long readValueNewStream = ReadWriteUtils.readVInt(oldBytes.getInput());
assertEquals(readValueNewStream, value);
}
@@ -73,7 +73,7 @@ private static Slice writeVintOld(SliceOutput output, long value)
Slice vLongOld = Slices.copyOf(output.slice());
output.reset();
- RcFileDecoderUtils.writeVLong(output, value);
+ ReadWriteUtils.writeVLong(output, value);
Slice vLongNew = Slices.copyOf(output.slice());
assertEquals(vLongNew, vLongOld);
@@ -84,7 +84,7 @@ private static Slice writeVintOld(SliceOutput output, long value)
assertEquals(vIntOld, vLongOld);
output.reset();
- RcFileDecoderUtils.writeVInt(output, (int) value);
+ ReadWriteUtils.writeVInt(output, (int) value);
Slice vIntNew = Slices.copyOf(output.slice());
assertEquals(vIntNew, vLongOld);
}
diff --git a/lib/trino-rcfile/src/test/java/io/trino/rcfile/TestBufferedOutputStreamSliceOutput.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/compression/TestBufferedOutputStreamSliceOutput.java
similarity index 98%
rename from lib/trino-rcfile/src/test/java/io/trino/rcfile/TestBufferedOutputStreamSliceOutput.java
rename to lib/trino-hive-formats/src/test/java/io/trino/hive/formats/compression/TestBufferedOutputStreamSliceOutput.java
index dc41c1283175..f8b20a6f76c0 100644
--- a/lib/trino-rcfile/src/test/java/io/trino/rcfile/TestBufferedOutputStreamSliceOutput.java
+++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/compression/TestBufferedOutputStreamSliceOutput.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.compression;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
diff --git a/lib/trino-rcfile/src/test/java/io/trino/rcfile/AbstractTestRcFileReader.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/AbstractTestRcFileReader.java
similarity index 98%
rename from lib/trino-rcfile/src/test/java/io/trino/rcfile/AbstractTestRcFileReader.java
rename to lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/AbstractTestRcFileReader.java
index 4aec0b4b75b9..5df8dfb866b5 100644
--- a/lib/trino-rcfile/src/test/java/io/trino/rcfile/AbstractTestRcFileReader.java
+++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/AbstractTestRcFileReader.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.rcfile;
import com.google.common.collect.ContiguousSet;
import com.google.common.collect.DiscreteDomain;
@@ -32,7 +32,7 @@
import static com.google.common.collect.Iterables.cycle;
import static com.google.common.collect.Iterables.limit;
-import static io.trino.rcfile.RcFileTester.Format.BINARY;
+import static io.trino.hive.formats.rcfile.RcFileTester.Format.BINARY;
import static io.trino.spi.type.BigintType.BIGINT;
import static io.trino.spi.type.BooleanType.BOOLEAN;
import static io.trino.spi.type.DateType.DATE;
diff --git a/lib/trino-rcfile/src/test/java/io/trino/rcfile/RcFileTester.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java
similarity index 92%
rename from lib/trino-rcfile/src/test/java/io/trino/rcfile/RcFileTester.java
rename to lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java
index d657066f9cd1..c60224f69d43 100644
--- a/lib/trino-rcfile/src/test/java/io/trino/rcfile/RcFileTester.java
+++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.rcfile;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableList;
@@ -19,13 +19,15 @@
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
-import io.airlift.slice.OutputStreamSliceOutput;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import io.airlift.units.DataSize;
+import io.trino.filesystem.TrinoInputFile;
+import io.trino.filesystem.local.LocalInputFile;
import io.trino.hadoop.HadoopNative;
-import io.trino.rcfile.binary.BinaryRcFileEncoding;
-import io.trino.rcfile.text.TextRcFileEncoding;
+import io.trino.hive.formats.compression.CompressionKind;
+import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding;
+import io.trino.hive.formats.rcfile.text.TextRcFileEncoding;
import io.trino.spi.Page;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
@@ -79,10 +81,6 @@
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.compress.BZip2Codec;
-import org.apache.hadoop.io.compress.GzipCodec;
-import org.apache.hadoop.io.compress.Lz4Codec;
-import org.apache.hadoop.io.compress.SnappyCodec;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobConf;
@@ -119,16 +117,15 @@
import static io.airlift.slice.SizeOf.SIZE_OF_INT;
import static io.airlift.slice.SizeOf.SIZE_OF_LONG;
import static io.airlift.units.DataSize.Unit.KILOBYTE;
-import static io.airlift.units.DataSize.Unit.MEGABYTE;
import static io.trino.hadoop.ConfigurationInstantiator.newEmptyConfiguration;
-import static io.trino.rcfile.RcFileDecoderUtils.findFirstSyncPosition;
-import static io.trino.rcfile.RcFileTester.Compression.BZIP2;
-import static io.trino.rcfile.RcFileTester.Compression.LZ4;
-import static io.trino.rcfile.RcFileTester.Compression.NONE;
-import static io.trino.rcfile.RcFileTester.Compression.SNAPPY;
-import static io.trino.rcfile.RcFileTester.Compression.ZLIB;
-import static io.trino.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION;
-import static io.trino.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY;
+import static io.trino.hive.formats.ReadWriteUtils.findFirstSyncPosition;
+import static io.trino.hive.formats.rcfile.RcFileTester.Compression.BZIP2;
+import static io.trino.hive.formats.rcfile.RcFileTester.Compression.GZIP;
+import static io.trino.hive.formats.rcfile.RcFileTester.Compression.LZ4;
+import static io.trino.hive.formats.rcfile.RcFileTester.Compression.NONE;
+import static io.trino.hive.formats.rcfile.RcFileTester.Compression.SNAPPY;
+import static io.trino.hive.formats.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION;
+import static io.trino.hive.formats.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY;
import static io.trino.spi.type.BigintType.BIGINT;
import static io.trino.spi.type.BooleanType.BOOLEAN;
import static io.trino.spi.type.DateType.DATE;
@@ -231,43 +228,24 @@ public RcFileEncoding getVectorEncoding()
public enum Compression
{
- BZIP2 {
- @Override
- Optional getCodecName()
- {
- return Optional.of(BZip2Codec.class.getName());
- }
- },
- ZLIB {
- @Override
- Optional getCodecName()
- {
- return Optional.of(GzipCodec.class.getName());
- }
- },
- SNAPPY {
- @Override
- Optional getCodecName()
- {
- return Optional.of(SnappyCodec.class.getName());
- }
- },
- LZ4 {
- @Override
- Optional getCodecName()
- {
- return Optional.of(Lz4Codec.class.getName());
- }
- },
- NONE {
- @Override
- Optional getCodecName()
- {
- return Optional.empty();
- }
- };
+ SNAPPY(CompressionKind.SNAPPY),
+ LZ4(CompressionKind.LZ4),
+ GZIP(CompressionKind.GZIP),
+ ZSTD(CompressionKind.ZSTD),
+ BZIP2(CompressionKind.BZIP2),
+ NONE(null);
+
+ private final Optional compressionKind;
+
+ Compression(CompressionKind compressionKind)
+ {
+ this.compressionKind = Optional.ofNullable(compressionKind);
+ }
- abstract Optional getCodecName();
+ public Optional getCompressionKind()
+ {
+ return compressionKind;
+ }
}
private boolean structTestsEnabled;
@@ -303,7 +281,7 @@ public static RcFileTester fullTestRcFileReader()
// These compression algorithms were chosen to cover the three different
// cases: uncompressed, aircompressor, and hadoop compression
// We assume that the compression algorithms generally work
- rcFileTester.compressions = ImmutableSet.of(NONE, LZ4, ZLIB, BZIP2);
+ rcFileTester.compressions = ImmutableSet.of(NONE, LZ4, GZIP, BZIP2);
return rcFileTester;
}
@@ -589,21 +567,20 @@ private static List getSyncPositionsSimple(RcFileReader recordReader, File
long syncFirst = sync.getLong(0);
long syncSecond = sync.getLong(8);
long syncPosition = 0;
- try (RcFileDataSource dataSource = new FileRcFileDataSource(file)) {
- while (syncPosition >= 0) {
- syncPosition = findFirstSyncPosition(dataSource, syncPosition, file.length() - syncPosition, syncFirst, syncSecond);
- if (syncPosition > 0) {
- assertEquals(findFirstSyncPosition(dataSource, syncPosition, 1, syncFirst, syncSecond), syncPosition);
- assertEquals(findFirstSyncPosition(dataSource, syncPosition, 2, syncFirst, syncSecond), syncPosition);
- assertEquals(findFirstSyncPosition(dataSource, syncPosition, 10, syncFirst, syncSecond), syncPosition);
-
- assertEquals(findFirstSyncPosition(dataSource, syncPosition - 1, 1, syncFirst, syncSecond), -1);
- assertEquals(findFirstSyncPosition(dataSource, syncPosition - 2, 2, syncFirst, syncSecond), -1);
- assertEquals(findFirstSyncPosition(dataSource, syncPosition + 1, 1, syncFirst, syncSecond), -1);
-
- syncPositions.add(syncPosition);
- syncPosition++;
- }
+ TrinoInputFile inputFile = new LocalInputFile(file);
+ while (syncPosition >= 0) {
+ syncPosition = findFirstSyncPosition(inputFile, syncPosition, file.length() - syncPosition, syncFirst, syncSecond);
+ if (syncPosition > 0) {
+ assertEquals(findFirstSyncPosition(inputFile, syncPosition, 1, syncFirst, syncSecond), syncPosition);
+ assertEquals(findFirstSyncPosition(inputFile, syncPosition, 2, syncFirst, syncSecond), syncPosition);
+ assertEquals(findFirstSyncPosition(inputFile, syncPosition, 10, syncFirst, syncSecond), syncPosition);
+
+ assertEquals(findFirstSyncPosition(inputFile, syncPosition - 1, 1, syncFirst, syncSecond), -1);
+ assertEquals(findFirstSyncPosition(inputFile, syncPosition - 2, 2, syncFirst, syncSecond), -1);
+ assertEquals(findFirstSyncPosition(inputFile, syncPosition + 1, 1, syncFirst, syncSecond), -1);
+
+ syncPositions.add(syncPosition);
+ syncPosition++;
}
}
return syncPositions;
@@ -612,15 +589,13 @@ private static List getSyncPositionsSimple(RcFileReader recordReader, File
private static RcFileReader createRcFileReader(TempFile tempFile, Type type, RcFileEncoding encoding)
throws IOException
{
- RcFileDataSource rcFileDataSource = new FileRcFileDataSource(tempFile.getFile());
+ TrinoInputFile rcFileDataSource = new LocalInputFile(tempFile.getFile());
RcFileReader rcFileReader = new RcFileReader(
rcFileDataSource,
encoding,
ImmutableMap.of(0, type),
- new AircompressorCodecFactory(new HadoopCodecFactory(RcFileTester.class.getClassLoader())),
0,
- tempFile.getFile().length(),
- DataSize.of(8, MEGABYTE));
+ tempFile.getFile().length());
assertEquals(rcFileReader.getColumnCount(), 1);
@@ -630,14 +605,11 @@ private static RcFileReader createRcFileReader(TempFile tempFile, Type type, RcF
private static DataSize writeRcFileColumnNew(File outputFile, Format format, Compression compression, Type type, Iterator> values, Map metadata)
throws Exception
{
- OutputStreamSliceOutput output = new OutputStreamSliceOutput(new FileOutputStream(outputFile));
- AircompressorCodecFactory codecFactory = new AircompressorCodecFactory(new HadoopCodecFactory(RcFileTester.class.getClassLoader()));
RcFileWriter writer = new RcFileWriter(
- output,
+ new FileOutputStream(outputFile),
ImmutableList.of(type),
format.getVectorEncoding(),
- compression.getCodecName(),
- codecFactory,
+ compression.getCompressionKind(),
metadata,
DataSize.of(100, KILOBYTE), // use a smaller size to create more row groups
DataSize.of(200, KILOBYTE),
@@ -651,9 +623,9 @@ private static DataSize writeRcFileColumnNew(File outputFile, Format format, Com
writer.write(new Page(blockBuilder.build()));
writer.close();
- writer.validate(new FileRcFileDataSource(outputFile));
+ writer.validate(new LocalInputFile(outputFile));
- return DataSize.ofBytes(output.size());
+ return DataSize.ofBytes(outputFile.length());
}
private static void writeValue(Type type, BlockBuilder blockBuilder, Object value)
@@ -1059,7 +1031,7 @@ private static RecordWriter createRcFileWriterOld(File outputFile, Compression c
throws IOException
{
JobConf jobConf = new JobConf(false);
- Optional codecName = compression.getCodecName();
+ Optional codecName = compression.getCompressionKind().map(CompressionKind::getHadoopClassName);
codecName.ifPresent(s -> jobConf.set(COMPRESS_CODEC, s));
return new RCFileOutputFormat().getHiveRecordWriter(
diff --git a/lib/trino-rcfile/src/test/java/io/trino/rcfile/TestFullRcFileReader.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestFullRcFileReader.java
similarity index 95%
rename from lib/trino-rcfile/src/test/java/io/trino/rcfile/TestFullRcFileReader.java
rename to lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestFullRcFileReader.java
index 6d9db727169d..bfa0722f2aa3 100644
--- a/lib/trino-rcfile/src/test/java/io/trino/rcfile/TestFullRcFileReader.java
+++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestFullRcFileReader.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.rcfile;
public class TestFullRcFileReader
extends AbstractTestRcFileReader
diff --git a/lib/trino-rcfile/src/test/java/io/trino/rcfile/TestRcFileReader.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReader.java
similarity index 95%
rename from lib/trino-rcfile/src/test/java/io/trino/rcfile/TestRcFileReader.java
rename to lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReader.java
index c975c4a7d4b3..2cd920674543 100644
--- a/lib/trino-rcfile/src/test/java/io/trino/rcfile/TestRcFileReader.java
+++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReader.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.rcfile;
public class TestRcFileReader
extends AbstractTestRcFileReader
diff --git a/lib/trino-rcfile/src/test/java/io/trino/rcfile/TestRcFileReaderManual.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java
similarity index 93%
rename from lib/trino-rcfile/src/test/java/io/trino/rcfile/TestRcFileReaderManual.java
rename to lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java
index e87f4527aaac..66ee6bfedd77 100644
--- a/lib/trino-rcfile/src/test/java/io/trino/rcfile/TestRcFileReaderManual.java
+++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java
@@ -11,15 +11,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package io.trino.rcfile;
+package io.trino.hive.formats.rcfile;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.airlift.slice.DynamicSliceOutput;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
-import io.airlift.units.DataSize;
-import io.trino.rcfile.binary.BinaryRcFileEncoding;
+import io.trino.filesystem.memory.MemoryInputFile;
+import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding;
import io.trino.spi.block.Block;
import org.joda.time.DateTimeZone;
import org.testng.annotations.Test;
@@ -29,7 +29,6 @@
import static com.google.common.base.Preconditions.checkArgument;
import static io.airlift.slice.Slices.utf8Slice;
-import static io.airlift.units.DataSize.Unit.MEGABYTE;
import static io.trino.spi.type.SmallintType.SMALLINT;
import static java.util.stream.Collectors.toList;
import static org.testng.Assert.assertEquals;
@@ -236,13 +235,11 @@ private static List readValues(Slice data, int offset, int length)
}
RcFileReader reader = new RcFileReader(
- new MemoryRcFileDataSource(new RcFileDataSourceId("test"), data),
+ new MemoryInputFile("test", data),
new BinaryRcFileEncoding(DateTimeZone.UTC),
ImmutableMap.of(0, SMALLINT),
- new BogusRcFileCodecFactory(),
offset,
- length,
- DataSize.of(8, MEGABYTE));
+ length);
ImmutableList.Builder values = ImmutableList.builder();
while (reader.advance() >= 0) {
@@ -290,20 +287,4 @@ public List getRowGroupSegmentOffsets()
return rowGroupSegmentOffsets;
}
}
-
- private static class BogusRcFileCodecFactory
- implements RcFileCodecFactory
- {
- @Override
- public RcFileCompressor createCompressor(String codecName)
- {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public RcFileDecompressor createDecompressor(String codecName)
- {
- throw new UnsupportedOperationException();
- }
- }
}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/AircompressorCodecFactory.java b/lib/trino-rcfile/src/main/java/io/trino/rcfile/AircompressorCodecFactory.java
deleted file mode 100644
index 869be0c63973..000000000000
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/AircompressorCodecFactory.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package io.trino.rcfile;
-
-import io.airlift.compress.gzip.JdkGzipCodec;
-import io.airlift.compress.lz4.Lz4Codec;
-import io.airlift.compress.lzo.LzoCodec;
-import io.airlift.compress.snappy.SnappyCodec;
-
-import static java.util.Objects.requireNonNull;
-
-public class AircompressorCodecFactory
- implements RcFileCodecFactory
-{
- private static final String SNAPPY_CODEC_NAME = "org.apache.hadoop.io.compress.SnappyCodec";
- private static final String LZO_CODEC_NAME = "com.hadoop.compression.lzo.LzoCodec";
- private static final String LZO_CODEC_NAME_DEPRECATED = "org.apache.hadoop.io.compress.LzoCodec";
- private static final String LZ4_CODEC_NAME = "org.apache.hadoop.io.compress.Lz4Codec";
- private static final String LZ4_HC_CODEC_NAME = "org.apache.hadoop.io.compress.Lz4Codec";
- private static final String GZIP_CODEC_NAME = "org.apache.hadoop.io.compress.GzipCodec";
-
- private final RcFileCodecFactory delegate;
-
- public AircompressorCodecFactory(RcFileCodecFactory delegate)
- {
- this.delegate = requireNonNull(delegate, "delegate is null");
- }
-
- @Override
- public RcFileCompressor createCompressor(String codecName)
- {
- if (SNAPPY_CODEC_NAME.equals(codecName)) {
- return new AircompressorCompressor(new SnappyCodec());
- }
- if (LZO_CODEC_NAME.equals(codecName) || LZO_CODEC_NAME_DEPRECATED.equals(codecName)) {
- return new AircompressorCompressor(new LzoCodec());
- }
- if (LZ4_CODEC_NAME.equals(codecName)) {
- return new AircompressorCompressor(new Lz4Codec());
- }
- if (GZIP_CODEC_NAME.equals(codecName)) {
- return new AircompressorCompressor(new JdkGzipCodec());
- }
- return delegate.createCompressor(codecName);
- }
-
- @Override
- public RcFileDecompressor createDecompressor(String codecName)
- {
- if (SNAPPY_CODEC_NAME.equals(codecName)) {
- return new AircompressorDecompressor(new SnappyCodec());
- }
- if (LZO_CODEC_NAME.equals(codecName) || LZO_CODEC_NAME_DEPRECATED.equals(codecName)) {
- return new AircompressorDecompressor(new LzoCodec());
- }
- if (LZ4_CODEC_NAME.equals(codecName) || LZ4_HC_CODEC_NAME.equals(codecName)) {
- return new AircompressorDecompressor(new Lz4Codec());
- }
- if (GZIP_CODEC_NAME.equals(codecName)) {
- return new AircompressorDecompressor(new JdkGzipCodec());
- }
- return delegate.createDecompressor(codecName);
- }
-}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/AircompressorCompressor.java b/lib/trino-rcfile/src/main/java/io/trino/rcfile/AircompressorCompressor.java
deleted file mode 100644
index 849fae82baf4..000000000000
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/AircompressorCompressor.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package io.trino.rcfile;
-
-import org.apache.hadoop.io.compress.CompressionCodec;
-import org.apache.hadoop.io.compress.CompressionOutputStream;
-import org.apache.hadoop.io.compress.Compressor;
-
-import java.io.IOException;
-import java.io.UncheckedIOException;
-import java.util.function.Supplier;
-
-import static java.util.Objects.requireNonNull;
-
-public class AircompressorCompressor
- implements RcFileCompressor
-{
- private final CompressionCodec codec;
-
- public AircompressorCompressor(CompressionCodec codec)
- {
- this.codec = requireNonNull(codec, "codec is null");
- }
-
- @Override
- public CompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize)
- {
- return new AircompressorCompressedSliceOutputSupplier(codec, minChunkSize, maxChunkSize).get();
- }
-
- private static class AircompressorCompressedSliceOutputSupplier
- implements Supplier
- {
- private final CompressionCodec codec;
- private final Compressor compressor;
- private final ChunkedSliceOutput compressedOutput;
-
- public AircompressorCompressedSliceOutputSupplier(CompressionCodec codec, int minChunkSize, int maxChunkSize)
- {
- this.codec = requireNonNull(codec, "codec is null");
- this.compressor = codec.createCompressor();
- this.compressedOutput = new ChunkedSliceOutput(minChunkSize, maxChunkSize);
- }
-
- @Override
- public CompressedSliceOutput get()
- {
- try {
- compressor.reset();
- compressedOutput.reset();
- CompressionOutputStream compressionStream = codec.createOutputStream(compressedOutput, compressor);
- return new CompressedSliceOutput(compressionStream, compressedOutput, this, () -> {});
- }
- catch (IOException e) {
- throw new UncheckedIOException(e);
- }
- }
- }
-}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/AircompressorDecompressor.java b/lib/trino-rcfile/src/main/java/io/trino/rcfile/AircompressorDecompressor.java
deleted file mode 100644
index b7759c43a6ea..000000000000
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/AircompressorDecompressor.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package io.trino.rcfile;
-
-import io.airlift.slice.Slice;
-import org.apache.hadoop.io.compress.CompressionCodec;
-import org.apache.hadoop.io.compress.CompressionInputStream;
-
-import java.io.IOException;
-
-import static java.util.Objects.requireNonNull;
-
-public class AircompressorDecompressor
- implements RcFileDecompressor
-{
- private final CompressionCodec codec;
-
- public AircompressorDecompressor(CompressionCodec codec)
- {
- this.codec = requireNonNull(codec, "codec is null");
- }
-
- @Override
- public void decompress(Slice compressed, Slice uncompressed)
- throws RcFileCorruptionException
- {
- try (CompressionInputStream decompressorStream = codec.createInputStream(compressed.getInput())) {
- uncompressed.setBytes(0, decompressorStream, uncompressed.length());
- }
- catch (IndexOutOfBoundsException | IOException e) {
- throw new RcFileCorruptionException(e, "Compressed stream is truncated");
- }
- }
-
- @Override
- public void destroy()
- {
- }
-}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/HadoopCodecFactory.java b/lib/trino-rcfile/src/main/java/io/trino/rcfile/HadoopCodecFactory.java
deleted file mode 100644
index f33c91ef4a1a..000000000000
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/HadoopCodecFactory.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package io.trino.rcfile;
-
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.io.compress.CompressionCodec;
-
-import java.lang.reflect.Constructor;
-
-import static io.trino.hadoop.ConfigurationInstantiator.newEmptyConfiguration;
-
-public class HadoopCodecFactory
- implements RcFileCodecFactory
-{
- private final ClassLoader classLoader;
-
- public HadoopCodecFactory(ClassLoader classLoader)
- {
- this.classLoader = classLoader;
- }
-
- @Override
- public RcFileCompressor createCompressor(String codecName)
- {
- CompressionCodec codec = createCompressionCodec(codecName);
- return new HadoopCompressor(codec);
- }
-
- @Override
- public RcFileDecompressor createDecompressor(String codecName)
- {
- CompressionCodec codec = createCompressionCodec(codecName);
- return new HadoopDecompressor(codec);
- }
-
- private CompressionCodec createCompressionCodec(String codecName)
- {
- try {
- Class extends CompressionCodec> codecClass = classLoader.loadClass(codecName).asSubclass(CompressionCodec.class);
- Constructor extends CompressionCodec> constructor = codecClass.getDeclaredConstructor();
- constructor.setAccessible(true);
- CompressionCodec codec = constructor.newInstance();
- if (codec instanceof Configurable) {
- // Hadoop is crazy... you have to give codecs an empty configuration or they throw NPEs
- // but you need to make sure the configuration doesn't "load" defaults or it spends
- // forever loading XML with no useful information
- ((Configurable) codec).setConf(newEmptyConfiguration());
- }
- return codec;
- }
- catch (ReflectiveOperationException e) {
- throw new IllegalArgumentException("Unknown codec: " + codecName, e);
- }
- }
-}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/HadoopCompressor.java b/lib/trino-rcfile/src/main/java/io/trino/rcfile/HadoopCompressor.java
deleted file mode 100644
index 9c4ff29b9fda..000000000000
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/HadoopCompressor.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package io.trino.rcfile;
-
-import org.apache.hadoop.io.compress.CodecPool;
-import org.apache.hadoop.io.compress.CompressionCodec;
-import org.apache.hadoop.io.compress.CompressionOutputStream;
-import org.apache.hadoop.io.compress.Compressor;
-
-import java.io.IOException;
-import java.io.UncheckedIOException;
-import java.util.function.Supplier;
-
-import static java.util.Objects.requireNonNull;
-
-public class HadoopCompressor
- implements RcFileCompressor
-{
- private final CompressionCodec codec;
-
- public HadoopCompressor(CompressionCodec codec)
- {
- this.codec = requireNonNull(codec, "codec is null");
- }
-
- @Override
- public CompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize)
- {
- return new HadoopCompressedSliceOutputSupplier(codec, minChunkSize, maxChunkSize).get();
- }
-
- private static class HadoopCompressedSliceOutputSupplier
- implements Supplier
- {
- private final CompressionCodec codec;
- private final Compressor compressor;
- private final ChunkedSliceOutput bufferedOutput;
-
- public HadoopCompressedSliceOutputSupplier(CompressionCodec codec, int minChunkSize, int maxChunkSize)
- {
- this.codec = requireNonNull(codec, "codec is null");
- this.compressor = CodecPool.getCompressor(requireNonNull(codec, "codec is null"));
- this.bufferedOutput = new ChunkedSliceOutput(minChunkSize, maxChunkSize);
- }
-
- @Override
- public CompressedSliceOutput get()
- {
- try {
- compressor.reset();
- bufferedOutput.reset();
- CompressionOutputStream compressionStream = codec.createOutputStream(bufferedOutput, compressor);
- return new CompressedSliceOutput(compressionStream, bufferedOutput, this, () -> CodecPool.returnCompressor(compressor));
- }
- catch (IOException e) {
- throw new UncheckedIOException(e);
- }
- }
- }
-}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/HadoopDecompressor.java b/lib/trino-rcfile/src/main/java/io/trino/rcfile/HadoopDecompressor.java
deleted file mode 100644
index 99de02061158..000000000000
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/HadoopDecompressor.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package io.trino.rcfile;
-
-import io.airlift.slice.Slice;
-import org.apache.hadoop.io.compress.CodecPool;
-import org.apache.hadoop.io.compress.CompressionCodec;
-import org.apache.hadoop.io.compress.CompressionInputStream;
-import org.apache.hadoop.io.compress.Decompressor;
-
-import java.io.IOException;
-
-import static com.google.common.base.Preconditions.checkState;
-import static java.util.Objects.requireNonNull;
-
-public class HadoopDecompressor
- implements RcFileDecompressor
-{
- private final CompressionCodec codec;
- private final Decompressor decompressor;
- private boolean destroyed;
-
- public HadoopDecompressor(CompressionCodec codec)
- {
- this.codec = requireNonNull(codec, "codec is null");
- decompressor = CodecPool.getDecompressor(codec);
- }
-
- @Override
- public void decompress(Slice compressed, Slice uncompressed)
- throws RcFileCorruptionException
- {
- checkState(!destroyed, "Codec has been destroyed");
- decompressor.reset();
- try (CompressionInputStream decompressorStream = codec.createInputStream(compressed.getInput(), decompressor)) {
- uncompressed.setBytes(0, decompressorStream, uncompressed.length());
- }
- catch (IndexOutOfBoundsException | IOException e) {
- throw new RcFileCorruptionException(e, "Compressed stream is truncated");
- }
- }
-
- @Override
- public void destroy()
- {
- if (destroyed) {
- return;
- }
- destroyed = true;
- CodecPool.returnDecompressor(decompressor);
- }
-}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/NoneCompressor.java b/lib/trino-rcfile/src/main/java/io/trino/rcfile/NoneCompressor.java
deleted file mode 100644
index c93a2d1e5ee1..000000000000
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/NoneCompressor.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package io.trino.rcfile;
-
-import java.util.function.Supplier;
-
-class NoneCompressor
- implements RcFileCompressor
-{
- @Override
- public CompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize)
- {
- return new NoneCompressedSliceOutputSupplier(minChunkSize, maxChunkSize).get();
- }
-
- private static class NoneCompressedSliceOutputSupplier
- implements Supplier
- {
- private final ChunkedSliceOutput chunkedSliceOutput;
-
- private NoneCompressedSliceOutputSupplier(int minChunkSize, int maxChunkSize)
- {
- chunkedSliceOutput = new ChunkedSliceOutput(minChunkSize, maxChunkSize);
- }
-
- @Override
- public CompressedSliceOutput get()
- {
- chunkedSliceOutput.reset();
- return new CompressedSliceOutput(chunkedSliceOutput, chunkedSliceOutput, this, () -> {});
- }
- }
-}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileCodecFactory.java b/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileCodecFactory.java
deleted file mode 100644
index 059d4403e7c7..000000000000
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileCodecFactory.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package io.trino.rcfile;
-
-public interface RcFileCodecFactory
-{
- RcFileCompressor createCompressor(String codecName);
-
- RcFileDecompressor createDecompressor(String codecName);
-}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileCompressor.java b/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileCompressor.java
deleted file mode 100644
index f4fb9cc6d1bc..000000000000
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileCompressor.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package io.trino.rcfile;
-
-import io.airlift.slice.Slice;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.List;
-import java.util.function.Supplier;
-
-import static com.google.common.base.Preconditions.checkState;
-import static java.util.Objects.requireNonNull;
-
-public interface RcFileCompressor
-{
- CompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize);
-
- // This specialized SliceOutput has direct access buffered output slices to
- // report buffer sizes and to get he final output. Additionally, a new
- // CompressedSliceOutput can be created that reuses the underlying output
- // buffer
- final class CompressedSliceOutput
- extends BufferedOutputStreamSliceOutput
- {
- private final ChunkedSliceOutput bufferedOutput;
- private final Supplier resetFactory;
- private final Runnable onDestroy;
- private boolean closed;
- private boolean destroyed;
-
- /**
- * @param compressionStream the compressed output stream to delegate to
- * @param bufferedOutput the output for the compressionStream
- * @param resetFactory the function to create a new CompressedSliceOutput that reuses the bufferedOutput
- * @param onDestroy used to cleanup the compression when done
- */
- public CompressedSliceOutput(OutputStream compressionStream, ChunkedSliceOutput bufferedOutput, Supplier resetFactory, Runnable onDestroy)
- {
- super(compressionStream);
- this.bufferedOutput = requireNonNull(bufferedOutput, "bufferedOutput is null");
- this.resetFactory = requireNonNull(resetFactory, "resetFactory is null");
- this.onDestroy = requireNonNull(onDestroy, "onDestroy is null");
- }
-
- @Override
- public long getRetainedSize()
- {
- return super.getRetainedSize() + bufferedOutput.getRetainedSize();
- }
-
- public int getCompressedSize()
- {
- checkState(closed, "Stream has not been closed");
- checkState(!destroyed, "Stream has been destroyed");
- return bufferedOutput.size();
- }
-
- public List getCompressedSlices()
- {
- checkState(closed, "Stream has not been closed");
- checkState(!destroyed, "Stream has been destroyed");
- return bufferedOutput.getSlices();
- }
-
- public CompressedSliceOutput createRecycledCompressedSliceOutput()
- {
- checkState(closed, "Stream has not been closed");
- checkState(!destroyed, "Stream has been destroyed");
- destroyed = true;
- return resetFactory.get();
- }
-
- @Override
- public void close()
- throws IOException
- {
- if (!closed) {
- closed = true;
- super.close();
- }
- }
-
- public void destroy()
- throws IOException
- {
- if (!destroyed) {
- destroyed = true;
- try {
- close();
- }
- finally {
- onDestroy.run();
- }
- }
- }
- }
-}
diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDataSourceId.java b/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDataSourceId.java
deleted file mode 100644
index b15257685184..000000000000
--- a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDataSourceId.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package io.trino.rcfile;
-
-import java.util.Objects;
-
-import static java.util.Objects.requireNonNull;
-
-public final class RcFileDataSourceId
-{
- private final String id;
-
- public RcFileDataSourceId(String id)
- {
- this.id = requireNonNull(id, "id is null");
- }
-
- @Override
- public boolean equals(Object o)
- {
- if (this == o) {
- return true;
- }
- if (o == null || getClass() != o.getClass()) {
- return false;
- }
- RcFileDataSourceId that = (RcFileDataSourceId) o;
- return Objects.equals(id, that.id);
- }
-
- @Override
- public int hashCode()
- {
- return Objects.hash(id);
- }
-
- @Override
- public String toString()
- {
- return id;
- }
-}
diff --git a/plugin/trino-hive/pom.xml b/plugin/trino-hive/pom.xml
index 0912ff40b426..2511b125199e 100644
--- a/plugin/trino-hive/pom.xml
+++ b/plugin/trino-hive/pom.xml
@@ -49,27 +49,27 @@
io.trino
- trino-memory-context
+ trino-hive-formats
io.trino
- trino-orc
+ trino-memory-context
io.trino
- trino-parquet
+ trino-orc
io.trino
- trino-plugin-toolkit
+ trino-parquet
io.trino
- trino-rcfile
+ trino-plugin-toolkit
@@ -260,6 +260,11 @@
alluxio-shaded-client
+
+ org.apache.iceberg
+ iceberg-api
+
+
org.apache.thrift
libthrift
diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveCompressionCodec.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveCompressionCodec.java
index 7a9aab74fd9a..eeda7a40a137 100644
--- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveCompressionCodec.java
+++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveCompressionCodec.java
@@ -15,11 +15,6 @@
import io.trino.orc.metadata.CompressionKind;
import org.apache.avro.file.DataFileConstants;
-import org.apache.hadoop.io.compress.CompressionCodec;
-import org.apache.hadoop.io.compress.GzipCodec;
-import org.apache.hadoop.io.compress.Lz4Codec;
-import org.apache.hadoop.io.compress.SnappyCodec;
-import org.apache.hadoop.io.compress.ZStandardCodec;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import java.util.Optional;
@@ -29,34 +24,34 @@
public enum HiveCompressionCodec
{
NONE(null, CompressionKind.NONE, CompressionCodecName.UNCOMPRESSED, DataFileConstants.NULL_CODEC),
- SNAPPY(SnappyCodec.class, CompressionKind.SNAPPY, CompressionCodecName.SNAPPY, DataFileConstants.SNAPPY_CODEC),
- LZ4(Lz4Codec.class, CompressionKind.LZ4, CompressionCodecName.LZ4, null),
- ZSTD(ZStandardCodec.class, CompressionKind.ZSTD, CompressionCodecName.ZSTD, DataFileConstants.ZSTANDARD_CODEC),
+ SNAPPY(io.trino.hive.formats.compression.CompressionKind.SNAPPY, CompressionKind.SNAPPY, CompressionCodecName.SNAPPY, DataFileConstants.SNAPPY_CODEC),
+ LZ4(io.trino.hive.formats.compression.CompressionKind.LZ4, CompressionKind.LZ4, CompressionCodecName.LZ4, null),
+ ZSTD(io.trino.hive.formats.compression.CompressionKind.ZSTD, CompressionKind.ZSTD, CompressionCodecName.ZSTD, DataFileConstants.ZSTANDARD_CODEC),
// Using DEFLATE for GZIP for Avro for now so Avro files can be written in default configuration
// TODO(https://github.com/trinodb/trino/issues/12580) change GZIP to be unsupported for Avro when we change Trino default compression to be storage format aware
- GZIP(GzipCodec.class, CompressionKind.ZLIB, CompressionCodecName.GZIP, DataFileConstants.DEFLATE_CODEC);
+ GZIP(io.trino.hive.formats.compression.CompressionKind.GZIP, CompressionKind.ZLIB, CompressionCodecName.GZIP, DataFileConstants.DEFLATE_CODEC);
- private final Optional> codec;
+ private final Optional hiveCompressionKind;
private final CompressionKind orcCompressionKind;
private final CompressionCodecName parquetCompressionCodec;
private final Optional avroCompressionCodec;
HiveCompressionCodec(
- Class extends CompressionCodec> codec,
+ io.trino.hive.formats.compression.CompressionKind hiveCompressionKind,
CompressionKind orcCompressionKind,
CompressionCodecName parquetCompressionCodec,
String avroCompressionCodec)
{
- this.codec = Optional.ofNullable(codec);
+ this.hiveCompressionKind = Optional.ofNullable(hiveCompressionKind);
this.orcCompressionKind = requireNonNull(orcCompressionKind, "orcCompressionKind is null");
this.parquetCompressionCodec = requireNonNull(parquetCompressionCodec, "parquetCompressionCodec is null");
this.avroCompressionCodec = Optional.ofNullable(avroCompressionCodec);
}
- public Optional> getCodec()
+ public Optional getHiveCompressionKind()
{
- return codec;
+ return hiveCompressionKind;
}
public CompressionKind getOrcCompressionKind()
diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/MonitoredTrinoInputFile.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/MonitoredTrinoInputFile.java
new file mode 100644
index 000000000000..2e642cba3db7
--- /dev/null
+++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/MonitoredTrinoInputFile.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.plugin.hive;
+
+import io.trino.filesystem.TrinoInput;
+import io.trino.filesystem.TrinoInputFile;
+import org.apache.iceberg.io.SeekableInputStream;
+
+import java.io.IOException;
+
+import static java.util.Objects.requireNonNull;
+
+public class MonitoredTrinoInputFile
+ implements TrinoInputFile
+{
+ private final FileFormatDataSourceStats stats;
+ private final TrinoInputFile delegate;
+
+ public MonitoredTrinoInputFile(FileFormatDataSourceStats stats, TrinoInputFile delegate)
+ {
+ this.stats = requireNonNull(stats, "stats is null");
+ this.delegate = requireNonNull(delegate, "delegate is null");
+ }
+
+ @Override
+ public TrinoInput newInput()
+ throws IOException
+ {
+ return new MonitoredTrinoInput(stats, delegate.newInput());
+ }
+
+ @Override
+ public long length()
+ throws IOException
+ {
+ return delegate.length();
+ }
+
+ @Override
+ public long modificationTime()
+ throws IOException
+ {
+ return delegate.modificationTime();
+ }
+
+ @Override
+ public boolean exists()
+ throws IOException
+ {
+ return delegate.exists();
+ }
+
+ @Override
+ public String location()
+ {
+ return delegate.location();
+ }
+
+ @Override
+ public String toString()
+ {
+ return delegate.toString();
+ }
+
+ private static final class MonitoredTrinoInput
+ implements TrinoInput
+ {
+ private final FileFormatDataSourceStats stats;
+ private final TrinoInput delegate;
+
+ public MonitoredTrinoInput(FileFormatDataSourceStats stats, TrinoInput delegate)
+ {
+ this.stats = requireNonNull(stats, "stats is null");
+ this.delegate = requireNonNull(delegate, "delegate is null");
+ }
+
+ @Override
+ public SeekableInputStream inputStream()
+ {
+ return new MonitoredSeekableInputStream(stats, delegate.inputStream());
+ }
+
+ @Override
+ public void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength)
+ throws IOException
+ {
+ long readStart = System.nanoTime();
+ delegate.readFully(position, buffer, bufferOffset, bufferLength);
+ stats.readDataBytesPerSecond(bufferLength, System.nanoTime() - readStart);
+ }
+
+ @Override
+ public int readTail(byte[] buffer, int bufferOffset, int bufferLength)
+ throws IOException
+ {
+ long readStart = System.nanoTime();
+ int size = delegate.readTail(buffer, bufferOffset, bufferLength);
+ stats.readDataBytesPerSecond(size, System.nanoTime() - readStart);
+ return size;
+ }
+
+ @Override
+ public void close()
+ throws IOException
+ {
+ delegate.close();
+ }
+ }
+
+ private static final class MonitoredSeekableInputStream
+ extends SeekableInputStream
+ {
+ private final FileFormatDataSourceStats stats;
+ private final SeekableInputStream delegate;
+
+ public MonitoredSeekableInputStream(FileFormatDataSourceStats stats, SeekableInputStream delegate)
+ {
+ this.stats = requireNonNull(stats, "stats is null");
+ this.delegate = requireNonNull(delegate, "delegate is null");
+ }
+
+ @Override
+ public long getPos()
+ throws IOException
+ {
+ return delegate.getPos();
+ }
+
+ @Override
+ public void seek(long newPos)
+ throws IOException
+ {
+ delegate.seek(newPos);
+ }
+
+ @Override
+ public int read()
+ throws IOException
+ {
+ long readStart = System.nanoTime();
+ int value = delegate.read();
+ stats.readDataBytesPerSecond(1, System.nanoTime() - readStart);
+ return value;
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len)
+ throws IOException
+ {
+ long readStart = System.nanoTime();
+ int size = delegate.read(b, off, len);
+ stats.readDataBytesPerSecond(size, System.nanoTime() - readStart);
+ return size;
+ }
+
+ @Override
+ public long skip(long n)
+ throws IOException
+ {
+ long readStart = System.nanoTime();
+ long size = delegate.skip(n);
+ stats.readDataBytesPerSecond(size, System.nanoTime() - readStart);
+ return size;
+ }
+
+ @Override
+ public void close()
+ throws IOException
+ {
+ delegate.close();
+ }
+ }
+}
diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java
index 50ec569d9024..7f27b70b8204 100644
--- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java
+++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java
@@ -15,12 +15,11 @@
import com.google.common.collect.ImmutableList;
import com.google.common.io.CountingOutputStream;
-import io.airlift.slice.OutputStreamSliceOutput;
-import io.trino.rcfile.AircompressorCodecFactory;
-import io.trino.rcfile.HadoopCodecFactory;
-import io.trino.rcfile.RcFileDataSource;
-import io.trino.rcfile.RcFileEncoding;
-import io.trino.rcfile.RcFileWriter;
+import io.trino.filesystem.TrinoInputFile;
+import io.trino.hive.formats.compression.CompressionKind;
+import io.trino.hive.formats.rcfile.RcFileEncoding;
+import io.trino.hive.formats.rcfile.RcFileWriter;
+import io.trino.memory.context.AggregatedMemoryContext;
import io.trino.spi.Page;
import io.trino.spi.TrinoException;
import io.trino.spi.block.Block;
@@ -54,32 +53,34 @@ public class RcFileFileWriter
private static final ThreadMXBean THREAD_MX_BEAN = ManagementFactory.getThreadMXBean();
private final CountingOutputStream outputStream;
+ private final AggregatedMemoryContext outputStreamMemoryContext;
private final RcFileWriter rcFileWriter;
private final Closeable rollbackAction;
private final int[] fileInputColumnIndexes;
private final List nullBlocks;
- private final Optional> validationInputFactory;
+ private final Optional> validationInputFactory;
private long validationCpuNanos;
public RcFileFileWriter(
OutputStream outputStream,
+ AggregatedMemoryContext outputStreamMemoryContext,
Closeable rollbackAction,
RcFileEncoding rcFileEncoding,
List fileColumnTypes,
- Optional codecName,
+ Optional compressionKind,
int[] fileInputColumnIndexes,
Map metadata,
- Optional> validationInputFactory)
+ Optional> validationInputFactory)
throws IOException
{
this.outputStream = new CountingOutputStream(outputStream);
+ this.outputStreamMemoryContext = outputStreamMemoryContext;
rcFileWriter = new RcFileWriter(
- new OutputStreamSliceOutput(this.outputStream),
+ this.outputStream,
fileColumnTypes,
rcFileEncoding,
- codecName,
- new AircompressorCodecFactory(new HadoopCodecFactory(getClass().getClassLoader())),
+ compressionKind,
metadata,
validationInputFactory.isPresent());
this.rollbackAction = requireNonNull(rollbackAction, "rollbackAction is null");
@@ -105,7 +106,7 @@ public long getWrittenBytes()
@Override
public long getMemoryUsage()
{
- return INSTANCE_SIZE + rcFileWriter.getRetainedSizeInBytes();
+ return INSTANCE_SIZE + rcFileWriter.getRetainedSizeInBytes() + outputStreamMemoryContext.getBytes();
}
@Override
@@ -148,11 +149,10 @@ public Closeable commit()
if (validationInputFactory.isPresent()) {
try {
- try (RcFileDataSource input = validationInputFactory.get().get()) {
- long startThreadCpuTime = THREAD_MX_BEAN.getCurrentThreadCpuTime();
- rcFileWriter.validate(input);
- validationCpuNanos += THREAD_MX_BEAN.getCurrentThreadCpuTime() - startThreadCpuTime;
- }
+ TrinoInputFile inputFile = validationInputFactory.get().get();
+ long startThreadCpuTime = THREAD_MX_BEAN.getCurrentThreadCpuTime();
+ rcFileWriter.validate(inputFile);
+ validationCpuNanos += THREAD_MX_BEAN.getCurrentThreadCpuTime() - startThreadCpuTime;
}
catch (IOException | UncheckedIOException e) {
throw new TrinoException(HIVE_WRITE_VALIDATION_FAILED, e);
diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java
index 61672051aebe..9dcc2c7a890a 100644
--- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java
+++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java
@@ -14,18 +14,20 @@
package io.trino.plugin.hive;
import com.google.common.collect.ImmutableMap;
+import io.trino.filesystem.TrinoFileSystem;
+import io.trino.filesystem.TrinoInputFile;
+import io.trino.filesystem.hdfs.HdfsFileSystemFactory;
import io.trino.hdfs.HdfsEnvironment;
+import io.trino.hive.formats.compression.CompressionKind;
+import io.trino.hive.formats.rcfile.RcFileEncoding;
+import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding;
+import io.trino.memory.context.AggregatedMemoryContext;
import io.trino.plugin.hive.acid.AcidTransaction;
import io.trino.plugin.hive.metastore.StorageFormat;
-import io.trino.plugin.hive.rcfile.HdfsRcFileDataSource;
-import io.trino.rcfile.RcFileDataSource;
-import io.trino.rcfile.RcFileEncoding;
-import io.trino.rcfile.binary.BinaryRcFileEncoding;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.type.Type;
import io.trino.spi.type.TypeManager;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
@@ -34,7 +36,6 @@
import javax.inject.Inject;
import java.io.Closeable;
-import java.io.IOException;
import java.io.OutputStream;
import java.util.List;
import java.util.Optional;
@@ -42,8 +43,8 @@
import java.util.Properties;
import java.util.function.Supplier;
+import static io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR;
-import static io.trino.plugin.hive.HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED;
import static io.trino.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME;
import static io.trino.plugin.hive.HiveMetadata.PRESTO_VERSION_NAME;
import static io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision;
@@ -64,31 +65,27 @@ public class RcFileFileWriterFactory
private final HdfsEnvironment hdfsEnvironment;
private final TypeManager typeManager;
private final NodeVersion nodeVersion;
- private final FileFormatDataSourceStats stats;
@Inject
public RcFileFileWriterFactory(
HdfsEnvironment hdfsEnvironment,
TypeManager typeManager,
NodeVersion nodeVersion,
- HiveConfig hiveConfig,
- FileFormatDataSourceStats stats)
+ HiveConfig hiveConfig)
{
- this(hdfsEnvironment, typeManager, nodeVersion, hiveConfig.getRcfileDateTimeZone(), stats);
+ this(hdfsEnvironment, typeManager, nodeVersion, hiveConfig.getRcfileDateTimeZone());
}
public RcFileFileWriterFactory(
HdfsEnvironment hdfsEnvironment,
TypeManager typeManager,
NodeVersion nodeVersion,
- DateTimeZone timeZone,
- FileFormatDataSourceStats stats)
+ DateTimeZone timeZone)
{
this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
this.typeManager = requireNonNull(typeManager, "typeManager is null");
this.nodeVersion = requireNonNull(nodeVersion, "nodeVersion is null");
this.timeZone = requireNonNull(timeZone, "timeZone is null");
- this.stats = requireNonNull(stats, "stats is null");
}
@Override
@@ -119,7 +116,8 @@ else if (COLUMNAR_SERDE_CLASS.equals(storageFormat.getSerde())) {
return Optional.empty();
}
- Optional codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
+ Optional compressionKind = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC))
+ .map(CompressionKind::fromHadoopClassName);
// existing tables and partitions may have columns in a different order than the writer is providing, so build
// an index to rearrange columns in the proper order
@@ -133,33 +131,24 @@ else if (COLUMNAR_SERDE_CLASS.equals(storageFormat.getSerde())) {
.toArray();
try {
- FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, configuration);
- OutputStream outputStream = fileSystem.create(path, false);
+ TrinoFileSystem fileSystem = new HdfsFileSystemFactory(hdfsEnvironment).create(session.getIdentity());
+ AggregatedMemoryContext outputStreamMemoryContext = newSimpleAggregatedMemoryContext();
+ OutputStream outputStream = fileSystem.newOutputFile(path.toString()).create(outputStreamMemoryContext);
- Optional> validationInputFactory = Optional.empty();
+ Optional> validationInputFactory = Optional.empty();
if (isRcfileOptimizedWriterValidate(session)) {
- validationInputFactory = Optional.of(() -> {
- try {
- return new HdfsRcFileDataSource(
- path.toString(),
- fileSystem.open(path),
- fileSystem.getFileStatus(path).getLen(),
- stats);
- }
- catch (IOException e) {
- throw new TrinoException(HIVE_WRITE_VALIDATION_FAILED, e);
- }
- });
+ validationInputFactory = Optional.of(() -> fileSystem.newInputFile(path.toString()));
}
- Closeable rollbackAction = () -> fileSystem.delete(path, false);
+ Closeable rollbackAction = () -> fileSystem.deleteFile(path.toString());
return Optional.of(new RcFileFileWriter(
outputStream,
+ outputStreamMemoryContext,
rollbackAction,
rcFileEncoding,
fileColumnTypes,
- codecName,
+ compressionKind,
fileInputColumnIndexes,
ImmutableMap.builder()
.put(PRESTO_VERSION_NAME, nodeVersion.toString())
diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/HdfsRcFileDataSource.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/HdfsRcFileDataSource.java
deleted file mode 100644
index 6def5227717e..000000000000
--- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/HdfsRcFileDataSource.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package io.trino.plugin.hive.rcfile;
-
-import io.trino.plugin.hive.FileFormatDataSourceStats;
-import io.trino.rcfile.RcFileDataSource;
-import io.trino.rcfile.RcFileDataSourceId;
-import org.apache.hadoop.fs.FSDataInputStream;
-
-import java.io.IOException;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static java.util.Objects.requireNonNull;
-
-public class HdfsRcFileDataSource
- implements RcFileDataSource
-{
- private final FSDataInputStream inputStream;
- private final String path;
- private final long size;
- private final FileFormatDataSourceStats stats;
- private long readTimeNanos;
- private long readBytes;
-
- public HdfsRcFileDataSource(String path, FSDataInputStream inputStream, long size, FileFormatDataSourceStats stats)
- {
- this.path = requireNonNull(path, "path is null");
- this.inputStream = requireNonNull(inputStream, "inputStream is null");
- this.size = size;
- checkArgument(size >= 0, "size is negative");
- this.stats = requireNonNull(stats, "stats is null");
- }
-
- @Override
- public RcFileDataSourceId getId()
- {
- return new RcFileDataSourceId(path);
- }
-
- @Override
- public void close()
- throws IOException
- {
- inputStream.close();
- }
-
- @Override
- public long getReadBytes()
- {
- return readBytes;
- }
-
- @Override
- public long getReadTimeNanos()
- {
- return readTimeNanos;
- }
-
- @Override
- public long getSize()
- {
- return size;
- }
-
- @Override
- public void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength)
- throws IOException
- {
- long start = System.nanoTime();
-
- inputStream.readFully(position, buffer, bufferOffset, bufferLength);
-
- long readDuration = System.nanoTime() - start;
- stats.readDataBytesPerSecond(bufferLength, readDuration);
-
- readTimeNanos += readDuration;
- readBytes += bufferLength;
- }
-
- @Override
- public String toString()
- {
- return path;
- }
-}
diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSource.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSource.java
index 918ff1942236..192a71fce9d6 100644
--- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSource.java
+++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSource.java
@@ -15,10 +15,10 @@
import com.google.common.collect.ImmutableList;
import io.airlift.units.DataSize;
+import io.trino.hive.formats.rcfile.RcFileCorruptionException;
+import io.trino.hive.formats.rcfile.RcFileReader;
import io.trino.plugin.hive.HiveColumnHandle;
import io.trino.plugin.hive.HiveType;
-import io.trino.rcfile.RcFileCorruptionException;
-import io.trino.rcfile.RcFileReader;
import io.trino.spi.Page;
import io.trino.spi.TrinoException;
import io.trino.spi.block.Block;
@@ -144,11 +144,11 @@ public Page getNextPage()
}
catch (RcFileCorruptionException e) {
closeAllSuppress(e, this);
- throw new TrinoException(HIVE_BAD_DATA, format("Corrupted RC file: %s", rcFileReader.getId()), e);
+ throw new TrinoException(HIVE_BAD_DATA, format("Corrupted RC file: %s", rcFileReader.getFileLocation()), e);
}
catch (IOException | RuntimeException e) {
closeAllSuppress(e, this);
- throw new TrinoException(HIVE_CURSOR_ERROR, format("Failed to read RC file: %s", rcFileReader.getId()), e);
+ throw new TrinoException(HIVE_CURSOR_ERROR, format("Failed to read RC file: %s", rcFileReader.getFileLocation()), e);
}
}
@@ -212,10 +212,10 @@ public Block load()
block = rcFileReader.readBlock(columnIndex);
}
catch (RcFileCorruptionException e) {
- throw new TrinoException(HIVE_BAD_DATA, format("Corrupted RC file: %s", rcFileReader.getId()), e);
+ throw new TrinoException(HIVE_BAD_DATA, format("Corrupted RC file: %s", rcFileReader.getFileLocation()), e);
}
catch (IOException | RuntimeException e) {
- throw new TrinoException(HIVE_CURSOR_ERROR, format("Failed to read RC file: %s", rcFileReader.getId()), e);
+ throw new TrinoException(HIVE_CURSOR_ERROR, format("Failed to read RC file: %s", rcFileReader.getFileLocation()), e);
}
loaded = true;
diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java
index 90af2d161ae2..162a8769b0ab 100644
--- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java
+++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java
@@ -18,27 +18,27 @@
import io.airlift.slice.Slices;
import io.airlift.units.DataSize;
import io.airlift.units.DataSize.Unit;
-import io.trino.hdfs.FSDataInputStreamTail;
+import io.trino.filesystem.TrinoFileSystem;
+import io.trino.filesystem.TrinoInput;
+import io.trino.filesystem.TrinoInputFile;
+import io.trino.filesystem.hdfs.HdfsFileSystemFactory;
+import io.trino.filesystem.memory.MemoryInputFile;
import io.trino.hdfs.HdfsEnvironment;
+import io.trino.hive.formats.rcfile.RcFileCorruptionException;
+import io.trino.hive.formats.rcfile.RcFileEncoding;
+import io.trino.hive.formats.rcfile.RcFileReader;
+import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding;
+import io.trino.hive.formats.rcfile.text.TextRcFileEncoding;
import io.trino.plugin.hive.AcidInfo;
import io.trino.plugin.hive.FileFormatDataSourceStats;
import io.trino.plugin.hive.HiveColumnHandle;
import io.trino.plugin.hive.HiveConfig;
import io.trino.plugin.hive.HivePageSourceFactory;
import io.trino.plugin.hive.HiveTimestampPrecision;
+import io.trino.plugin.hive.MonitoredTrinoInputFile;
import io.trino.plugin.hive.ReaderColumns;
import io.trino.plugin.hive.ReaderPageSource;
import io.trino.plugin.hive.acid.AcidTransaction;
-import io.trino.rcfile.AircompressorCodecFactory;
-import io.trino.rcfile.HadoopCodecFactory;
-import io.trino.rcfile.MemoryRcFileDataSource;
-import io.trino.rcfile.RcFileCorruptionException;
-import io.trino.rcfile.RcFileDataSource;
-import io.trino.rcfile.RcFileDataSourceId;
-import io.trino.rcfile.RcFileEncoding;
-import io.trino.rcfile.RcFileReader;
-import io.trino.rcfile.binary.BinaryRcFileEncoding;
-import io.trino.rcfile.text.TextRcFileEncoding;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.ConnectorPageSource;
import io.trino.spi.connector.ConnectorSession;
@@ -47,8 +47,6 @@
import io.trino.spi.type.Type;
import io.trino.spi.type.TypeManager;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.BlockMissingException;
import org.joda.time.DateTimeZone;
@@ -56,7 +54,7 @@
import javax.inject.Inject;
import java.io.FileNotFoundException;
-import java.io.IOException;
+import java.io.InputStream;
import java.util.List;
import java.util.Optional;
import java.util.OptionalInt;
@@ -65,6 +63,8 @@
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Strings.nullToEmpty;
import static com.google.common.collect.ImmutableList.toImmutableList;
+import static io.trino.hive.formats.rcfile.text.TextRcFileEncoding.DEFAULT_NULL_SEQUENCE;
+import static io.trino.hive.formats.rcfile.text.TextRcFileEncoding.getDefaultSeparators;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_BAD_DATA;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_MISSING_DATA;
@@ -82,10 +82,7 @@
import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST;
import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB;
import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_NULL_FORMAT;
-import static io.trino.rcfile.text.TextRcFileEncoding.DEFAULT_NULL_SEQUENCE;
-import static io.trino.rcfile.text.TextRcFileEncoding.getDefaultSeparators;
import static java.lang.Math.min;
-import static java.lang.Math.toIntExact;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters.SERIALIZATION_EXTEND_NESTING_LEVELS;
@@ -161,24 +158,22 @@ else if (deserializerClassName.equals(COLUMNAR_SERDE_CLASS)) {
.collect(toImmutableList());
}
- RcFileDataSource dataSource;
+ TrinoFileSystem trinoFileSystem = new HdfsFileSystemFactory(hdfsEnvironment).create(session.getIdentity());
+ TrinoInputFile inputFile = new MonitoredTrinoInputFile(stats, trinoFileSystem.newInputFile(path.toString()));
try {
- FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, configuration);
- FSDataInputStream inputStream = hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.open(path));
+ length = min(inputFile.length() - start, length);
+ if (!inputFile.exists()) {
+ throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, "File does not exist");
+ }
if (estimatedFileSize < BUFFER_SIZE.toBytes()) {
- // Handle potentially imprecise file lengths by reading the footer
- try {
- FSDataInputStreamTail fileTail = FSDataInputStreamTail.readTail(path.toString(), estimatedFileSize, inputStream, toIntExact(BUFFER_SIZE.toBytes()));
- dataSource = new MemoryRcFileDataSource(new RcFileDataSourceId(path.toString()), fileTail.getTailSlice());
- }
- finally {
- inputStream.close();
+ try (TrinoInput input = inputFile.newInput(); InputStream inputStream = input.inputStream()) {
+ byte[] data = inputStream.readAllBytes();
+ inputFile = new MemoryInputFile(path.toString(), Slices.wrappedBuffer(data));
}
}
- else {
- long fileSize = hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.getFileStatus(path).getLen());
- dataSource = new HdfsRcFileDataSource(path.toString(), inputStream, fileSize, stats);
- }
+ }
+ catch (TrinoException e) {
+ throw e;
}
catch (Exception e) {
if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") ||
@@ -188,7 +183,6 @@ else if (deserializerClassName.equals(COLUMNAR_SERDE_CLASS)) {
throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
}
- length = min(dataSource.getSize() - start, length);
// Split may be empty now that the correct file size is known
if (length <= 0) {
return Optional.of(noProjectionAdaptation(new EmptyPageSource()));
@@ -202,23 +196,16 @@ else if (deserializerClassName.equals(COLUMNAR_SERDE_CLASS)) {
}
RcFileReader rcFileReader = new RcFileReader(
- dataSource,
+ inputFile,
rcFileEncoding,
readColumns.buildOrThrow(),
- new AircompressorCodecFactory(new HadoopCodecFactory(configuration.getClassLoader())),
start,
- length,
- BUFFER_SIZE);
+ length);
ConnectorPageSource pageSource = new RcFilePageSource(rcFileReader, projectedReaderColumns);
return Optional.of(new ReaderPageSource(pageSource, readerProjections));
}
catch (Throwable e) {
- try {
- dataSource.close();
- }
- catch (IOException ignored) {
- }
if (e instanceof TrinoException) {
throw (TrinoException) e;
}
diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/CompressionConfigUtil.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/CompressionConfigUtil.java
index b7ced2c11f5f..525edff6c559 100644
--- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/CompressionConfigUtil.java
+++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/CompressionConfigUtil.java
@@ -41,9 +41,9 @@ public static void configureCompression(Configuration config, HiveCompressionCod
OrcConf.COMPRESS.setString(config, compressionCodec.getOrcCompressionKind().name());
// For RCFile and Text
- if (compressionCodec.getCodec().isPresent()) {
- config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName());
- config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName());
+ if (compressionCodec.getHiveCompressionKind().isPresent()) {
+ config.set("mapred.output.compression.codec", compressionCodec.getHiveCompressionKind().get().getHadoopClassName());
+ config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getHiveCompressionKind().get().getHadoopClassName());
}
else {
config.unset("mapred.output.compression.codec");
diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java
index 7ac8b58e1eae..4b84368c4f6e 100644
--- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java
+++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java
@@ -203,7 +203,7 @@ public static Set getDefaultHiveRecordCursorProviders(
public static Set getDefaultHiveFileWriterFactories(HiveConfig hiveConfig, HdfsEnvironment hdfsEnvironment)
{
return ImmutableSet.builder()
- .add(new RcFileFileWriterFactory(hdfsEnvironment, TESTING_TYPE_MANAGER, new NodeVersion("test_version"), hiveConfig, new FileFormatDataSourceStats()))
+ .add(new RcFileFileWriterFactory(hdfsEnvironment, TESTING_TYPE_MANAGER, new NodeVersion("test_version"), hiveConfig))
.add(getDefaultOrcFileWriterFactory(hdfsEnvironment))
.build();
}
diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java
index b12ab6308eca..95035c2660df 100644
--- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java
+++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java
@@ -20,6 +20,7 @@
import io.airlift.compress.lzo.LzopCodec;
import io.trino.filesystem.TrinoFileSystemFactory;
import io.trino.filesystem.hdfs.HdfsFileSystemFactory;
+import io.trino.hive.formats.compression.CompressionKind;
import io.trino.orc.OrcReaderOptions;
import io.trino.orc.OrcWriterOptions;
import io.trino.plugin.hive.orc.OrcFileWriterFactory;
@@ -248,7 +249,7 @@ public void testRcTextOptimizedWriter(int rowCount)
assertThatFileFormat(RCTEXT)
.withColumns(testColumns)
.withRowsCount(rowCount)
- .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS))
+ .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE))
.isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT))
.isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig()));
}
@@ -289,7 +290,7 @@ public void testRcBinaryOptimizedWriter(int rowCount)
assertThatFileFormat(RCBINARY)
.withColumns(testColumns)
.withRowsCount(rowCount)
- .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS))
+ .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE))
.isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig()))
.withColumns(testColumnsNoTimestamps)
.isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
@@ -773,7 +774,7 @@ public void testRCBinaryProjectedColumns(int rowCount)
.withWriteColumns(writeColumns)
.withReadColumns(readColumns)
.withRowsCount(rowCount)
- .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS))
+ .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE))
.isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig()));
}
@@ -801,7 +802,7 @@ public void testRCBinaryProjectedColumnsPageSource(int rowCount)
.withWriteColumns(writeColumns)
.withReadColumns(readColumns)
.withRowsCount(rowCount)
- .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS))
+ .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE))
.isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig()));
}
@@ -1270,15 +1271,8 @@ private void assertRead(Optional pageSourceFactory, Optio
assertNotNull(session, "session must be specified");
assertTrue(rowsCount >= 0, "rowsCount must be non-negative");
- String compressionSuffix = compressionCodec.getCodec()
- .map(codec -> {
- try {
- return codec.getConstructor().newInstance().getDefaultExtension();
- }
- catch (Exception e) {
- throw new RuntimeException(e);
- }
- })
+ String compressionSuffix = compressionCodec.getHiveCompressionKind()
+ .map(CompressionKind::getFileExtension)
.orElse("");
File file = File.createTempFile("trino_test", formatName + compressionSuffix);
diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java
index b30ea5c6053f..25dab4c05fb2 100644
--- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java
+++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java
@@ -14,9 +14,12 @@
package io.trino.plugin.hive.benchmark;
import com.google.common.collect.ImmutableMap;
-import io.airlift.slice.OutputStreamSliceOutput;
import io.trino.filesystem.hdfs.HdfsFileSystemFactory;
import io.trino.hdfs.HdfsEnvironment;
+import io.trino.hive.formats.rcfile.RcFileEncoding;
+import io.trino.hive.formats.rcfile.RcFileWriter;
+import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding;
+import io.trino.hive.formats.rcfile.text.TextRcFileEncoding;
import io.trino.orc.OrcReaderOptions;
import io.trino.orc.OrcWriter;
import io.trino.orc.OrcWriterOptions;
@@ -37,12 +40,6 @@
import io.trino.plugin.hive.parquet.ParquetPageSourceFactory;
import io.trino.plugin.hive.parquet.ParquetReaderConfig;
import io.trino.plugin.hive.rcfile.RcFilePageSourceFactory;
-import io.trino.rcfile.AircompressorCodecFactory;
-import io.trino.rcfile.HadoopCodecFactory;
-import io.trino.rcfile.RcFileEncoding;
-import io.trino.rcfile.RcFileWriter;
-import io.trino.rcfile.binary.BinaryRcFileEncoding;
-import io.trino.rcfile.text.TextRcFileEncoding;
import io.trino.spi.Page;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.type.Type;
@@ -386,11 +383,10 @@ public PrestoRcFileFormatWriter(File targetFile, List types, RcFileEncodin
throws IOException
{
writer = new RcFileWriter(
- new OutputStreamSliceOutput(new FileOutputStream(targetFile)),
+ new FileOutputStream(targetFile),
types,
encoding,
- compressionCodec.getCodec().map(Class::getName),
- new AircompressorCodecFactory(new HadoopCodecFactory(getClass().getClassLoader())),
+ compressionCodec.getHiveCompressionKind(),
ImmutableMap.of(),
true);
}
diff --git a/pom.xml b/pom.xml
index 8acafb4b0d44..1ae8edee5ea0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -118,6 +118,7 @@
lib/trino-geospatial-toolkit
lib/trino-hadoop-toolkit
lib/trino-hdfs
+ lib/trino-hive-formats
lib/trino-matching
lib/trino-memory-context
lib/trino-orc
@@ -125,7 +126,6 @@
lib/trino-phoenix5-patched
lib/trino-plugin-toolkit
- lib/trino-rcfile
lib/trino-record-decoder
plugin/trino-accumulo
plugin/trino-accumulo-iterators
@@ -357,6 +357,12 @@
${project.version}
+
+ io.trino
+ trino-hive-formats
+ ${project.version}
+
+
io.trino
trino-hive-hadoop2
@@ -558,12 +564,6 @@
${project.version}
-
- io.trino
- trino-rcfile
- ${project.version}
-
-
io.trino
trino-record-decoder