From c22e54e215246421aefd9ad899e5546432e6a4ef Mon Sep 17 00:00:00 2001 From: Dain Sundstrom Date: Sat, 27 Aug 2022 16:27:59 -0700 Subject: [PATCH 01/10] Rename trino-rcfile to trino-hive-formats --- .github/config/labeler-config.yml | 2 +- lib/{trino-rcfile => trino-hive-formats}/pom.xml | 6 +++--- .../io/trino/rcfile/AircompressorCodecFactory.java | 0 .../io/trino/rcfile/AircompressorCompressor.java | 0 .../io/trino/rcfile/AircompressorDecompressor.java | 0 .../rcfile/BufferedOutputStreamSliceOutput.java | 0 .../java/io/trino/rcfile/ChunkedSliceOutput.java | 0 .../src/main/java/io/trino/rcfile/ColumnData.java | 0 .../main/java/io/trino/rcfile/ColumnEncoding.java | 0 .../main/java/io/trino/rcfile/EncodeOutput.java | 0 .../java/io/trino/rcfile/FileRcFileDataSource.java | 0 .../java/io/trino/rcfile/HadoopCodecFactory.java | 0 .../java/io/trino/rcfile/HadoopCompressor.java | 0 .../java/io/trino/rcfile/HadoopDecompressor.java | 0 .../io/trino/rcfile/MemoryRcFileDataSource.java | 0 .../main/java/io/trino/rcfile/NoneCompressor.java | 0 .../java/io/trino/rcfile/PageSplitterUtil.java | 0 .../java/io/trino/rcfile/RcFileCodecFactory.java | 0 .../java/io/trino/rcfile/RcFileCompressor.java | 0 .../io/trino/rcfile/RcFileCorruptionException.java | 0 .../java/io/trino/rcfile/RcFileDataSource.java | 0 .../java/io/trino/rcfile/RcFileDataSourceId.java | 0 .../java/io/trino/rcfile/RcFileDecoderUtils.java | 0 .../java/io/trino/rcfile/RcFileDecompressor.java | 0 .../main/java/io/trino/rcfile/RcFileEncoding.java | 0 .../main/java/io/trino/rcfile/RcFileReader.java | 0 .../io/trino/rcfile/RcFileWriteValidation.java | 0 .../main/java/io/trino/rcfile/RcFileWriter.java | 0 .../main/java/io/trino/rcfile/TimestampHolder.java | 0 .../main/java/io/trino/rcfile/ValidationHash.java | 0 .../trino/rcfile/binary/BinaryColumnEncoding.java | 0 .../io/trino/rcfile/binary/BinaryEncoding.java | 0 .../trino/rcfile/binary/BinaryRcFileEncoding.java | 0 .../java/io/trino/rcfile/binary/BlockEncoding.java | 0 .../io/trino/rcfile/binary/BooleanEncoding.java | 0 .../java/io/trino/rcfile/binary/ByteEncoding.java | 0 .../java/io/trino/rcfile/binary/DateEncoding.java | 0 .../io/trino/rcfile/binary/DecimalEncoding.java | 0 .../io/trino/rcfile/binary/DoubleEncoding.java | 0 .../java/io/trino/rcfile/binary/FloatEncoding.java | 0 .../java/io/trino/rcfile/binary/ListEncoding.java | 0 .../java/io/trino/rcfile/binary/LongEncoding.java | 0 .../java/io/trino/rcfile/binary/MapEncoding.java | 0 .../java/io/trino/rcfile/binary/ShortEncoding.java | 0 .../io/trino/rcfile/binary/StringEncoding.java | 0 .../io/trino/rcfile/binary/StructEncoding.java | 0 .../io/trino/rcfile/binary/TimestampEncoding.java | 0 .../java/io/trino/rcfile/text/BinaryEncoding.java | 0 .../java/io/trino/rcfile/text/BlockEncoding.java | 0 .../java/io/trino/rcfile/text/BooleanEncoding.java | 0 .../java/io/trino/rcfile/text/DateEncoding.java | 0 .../java/io/trino/rcfile/text/DecimalEncoding.java | 0 .../java/io/trino/rcfile/text/DoubleEncoding.java | 0 .../java/io/trino/rcfile/text/FloatEncoding.java | 0 .../java/io/trino/rcfile/text/ListEncoding.java | 0 .../java/io/trino/rcfile/text/LongEncoding.java | 0 .../java/io/trino/rcfile/text/MapEncoding.java | 0 .../java/io/trino/rcfile/text/StringEncoding.java | 0 .../java/io/trino/rcfile/text/StructEncoding.java | 0 .../io/trino/rcfile/text/TextColumnEncoding.java | 0 .../io/trino/rcfile/text/TextRcFileEncoding.java | 0 .../io/trino/rcfile/text/TimestampEncoding.java | 0 .../io/trino/rcfile/AbstractTestRcFileReader.java | 0 .../test/java/io/trino/rcfile/RcFileTester.java | 0 .../TestBufferedOutputStreamSliceOutput.java | 0 .../java/io/trino/rcfile/TestFullRcFileReader.java | 0 .../io/trino/rcfile/TestRcFileDecoderUtils.java | 0 .../java/io/trino/rcfile/TestRcFileReader.java | 0 .../io/trino/rcfile/TestRcFileReaderManual.java | 0 plugin/trino-hive/pom.xml | 10 +++++----- pom.xml | 14 +++++++------- 71 files changed, 16 insertions(+), 16 deletions(-) rename lib/{trino-rcfile => trino-hive-formats}/pom.xml (96%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/AircompressorCodecFactory.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/AircompressorCompressor.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/AircompressorDecompressor.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/BufferedOutputStreamSliceOutput.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/ChunkedSliceOutput.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/ColumnData.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/ColumnEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/EncodeOutput.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/FileRcFileDataSource.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/HadoopCodecFactory.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/HadoopCompressor.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/HadoopDecompressor.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/MemoryRcFileDataSource.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/NoneCompressor.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/PageSplitterUtil.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/RcFileCodecFactory.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/RcFileCompressor.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/RcFileCorruptionException.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/RcFileDataSource.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/RcFileDataSourceId.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/RcFileDecoderUtils.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/RcFileDecompressor.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/RcFileEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/RcFileReader.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/RcFileWriteValidation.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/RcFileWriter.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/TimestampHolder.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/ValidationHash.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/BinaryColumnEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/BinaryEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/BinaryRcFileEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/BlockEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/BooleanEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/ByteEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/DateEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/DecimalEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/DoubleEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/FloatEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/ListEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/LongEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/MapEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/ShortEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/StringEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/StructEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/binary/TimestampEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/text/BinaryEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/text/BlockEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/text/BooleanEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/text/DateEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/text/DecimalEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/text/DoubleEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/text/FloatEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/text/ListEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/text/LongEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/text/MapEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/text/StringEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/text/StructEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/text/TextColumnEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/text/TextRcFileEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/main/java/io/trino/rcfile/text/TimestampEncoding.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/test/java/io/trino/rcfile/AbstractTestRcFileReader.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/test/java/io/trino/rcfile/RcFileTester.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/test/java/io/trino/rcfile/TestBufferedOutputStreamSliceOutput.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/test/java/io/trino/rcfile/TestFullRcFileReader.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/test/java/io/trino/rcfile/TestRcFileDecoderUtils.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/test/java/io/trino/rcfile/TestRcFileReader.java (100%) rename lib/{trino-rcfile => trino-hive-formats}/src/test/java/io/trino/rcfile/TestRcFileReaderManual.java (100%) diff --git a/.github/config/labeler-config.yml b/.github/config/labeler-config.yml index f968947a02e2..00a1b2948657 100644 --- a/.github/config/labeler-config.yml +++ b/.github/config/labeler-config.yml @@ -2,7 +2,7 @@ "tests:hive": - lib/trino-orc/** - lib/trino-parquet/** - - lib/trino-rcfile/** + - lib/trino-hive-formats/** - plugin/trino-hive-hadoop2/** - plugin/trino-hive/** - testing/trino-product-tests/** diff --git a/lib/trino-rcfile/pom.xml b/lib/trino-hive-formats/pom.xml similarity index 96% rename from lib/trino-rcfile/pom.xml rename to lib/trino-hive-formats/pom.xml index 377a4a845c16..60d21a8bb901 100644 --- a/lib/trino-rcfile/pom.xml +++ b/lib/trino-hive-formats/pom.xml @@ -9,9 +9,9 @@ ../../pom.xml - trino-rcfile - trino-rcfile - Trino - RCFile + trino-hive-formats + trino-hive-formats + Trino - Hive Formats ${project.parent.basedir} diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/AircompressorCodecFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/AircompressorCodecFactory.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/AircompressorCodecFactory.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/AircompressorCodecFactory.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/AircompressorCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/AircompressorCompressor.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/AircompressorCompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/AircompressorCompressor.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/AircompressorDecompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/AircompressorDecompressor.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/AircompressorDecompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/AircompressorDecompressor.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/BufferedOutputStreamSliceOutput.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/BufferedOutputStreamSliceOutput.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/BufferedOutputStreamSliceOutput.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/BufferedOutputStreamSliceOutput.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/ChunkedSliceOutput.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/ChunkedSliceOutput.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/ChunkedSliceOutput.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/ChunkedSliceOutput.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/ColumnData.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/ColumnData.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/ColumnData.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/ColumnData.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/ColumnEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/ColumnEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/ColumnEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/ColumnEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/EncodeOutput.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/EncodeOutput.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/EncodeOutput.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/EncodeOutput.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/FileRcFileDataSource.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/FileRcFileDataSource.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/FileRcFileDataSource.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/FileRcFileDataSource.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/HadoopCodecFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/HadoopCodecFactory.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/HadoopCodecFactory.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/HadoopCodecFactory.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/HadoopCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/HadoopCompressor.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/HadoopCompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/HadoopCompressor.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/HadoopDecompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/HadoopDecompressor.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/HadoopDecompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/HadoopDecompressor.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/MemoryRcFileDataSource.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/MemoryRcFileDataSource.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/MemoryRcFileDataSource.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/MemoryRcFileDataSource.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/NoneCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/NoneCompressor.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/NoneCompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/NoneCompressor.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/PageSplitterUtil.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/PageSplitterUtil.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/PageSplitterUtil.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/PageSplitterUtil.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileCodecFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileCodecFactory.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileCodecFactory.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileCodecFactory.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileCompressor.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileCompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileCompressor.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileCorruptionException.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileCorruptionException.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileCorruptionException.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileCorruptionException.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDataSource.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDataSource.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDataSource.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDataSource.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDataSourceId.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDataSourceId.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDataSourceId.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDataSourceId.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDecoderUtils.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDecoderUtils.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDecoderUtils.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDecoderUtils.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDecompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDecompressor.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileDecompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDecompressor.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileReader.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileReader.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileReader.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileReader.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileWriteValidation.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileWriteValidation.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileWriteValidation.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileWriteValidation.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileWriter.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileWriter.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/RcFileWriter.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileWriter.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/TimestampHolder.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/TimestampHolder.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/TimestampHolder.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/TimestampHolder.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/ValidationHash.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/ValidationHash.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/ValidationHash.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/ValidationHash.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BinaryColumnEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BinaryColumnEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BinaryColumnEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BinaryColumnEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BinaryEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BinaryEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BinaryEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BinaryEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BinaryRcFileEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BinaryRcFileEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BinaryRcFileEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BinaryRcFileEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BlockEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BlockEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BlockEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BlockEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BooleanEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BooleanEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/BooleanEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BooleanEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/ByteEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/ByteEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/ByteEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/ByteEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/DateEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/DateEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/DateEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/DateEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/DecimalEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/DecimalEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/DecimalEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/DecimalEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/DoubleEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/DoubleEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/DoubleEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/DoubleEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/FloatEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/FloatEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/FloatEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/FloatEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/ListEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/ListEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/ListEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/ListEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/LongEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/LongEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/LongEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/LongEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/MapEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/MapEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/MapEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/MapEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/ShortEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/ShortEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/ShortEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/ShortEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/StringEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/StringEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/StringEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/StringEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/StructEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/StructEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/StructEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/StructEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/TimestampEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/TimestampEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/binary/TimestampEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/TimestampEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/BinaryEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/BinaryEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/BinaryEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/BinaryEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/BlockEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/BlockEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/BlockEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/BlockEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/BooleanEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/BooleanEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/BooleanEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/BooleanEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/DateEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/DateEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/DateEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/DateEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/DecimalEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/DecimalEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/DecimalEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/DecimalEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/DoubleEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/DoubleEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/DoubleEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/DoubleEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/FloatEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/FloatEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/FloatEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/FloatEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/ListEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/ListEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/ListEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/ListEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/LongEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/LongEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/LongEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/LongEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/MapEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/MapEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/MapEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/MapEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/StringEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/StringEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/StringEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/StringEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/StructEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/StructEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/StructEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/StructEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/TextColumnEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/TextColumnEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/TextColumnEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/TextColumnEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/TextRcFileEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/TextRcFileEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/TextRcFileEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/TextRcFileEncoding.java diff --git a/lib/trino-rcfile/src/main/java/io/trino/rcfile/text/TimestampEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/TimestampEncoding.java similarity index 100% rename from lib/trino-rcfile/src/main/java/io/trino/rcfile/text/TimestampEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/TimestampEncoding.java diff --git a/lib/trino-rcfile/src/test/java/io/trino/rcfile/AbstractTestRcFileReader.java b/lib/trino-hive-formats/src/test/java/io/trino/rcfile/AbstractTestRcFileReader.java similarity index 100% rename from lib/trino-rcfile/src/test/java/io/trino/rcfile/AbstractTestRcFileReader.java rename to lib/trino-hive-formats/src/test/java/io/trino/rcfile/AbstractTestRcFileReader.java diff --git a/lib/trino-rcfile/src/test/java/io/trino/rcfile/RcFileTester.java b/lib/trino-hive-formats/src/test/java/io/trino/rcfile/RcFileTester.java similarity index 100% rename from lib/trino-rcfile/src/test/java/io/trino/rcfile/RcFileTester.java rename to lib/trino-hive-formats/src/test/java/io/trino/rcfile/RcFileTester.java diff --git a/lib/trino-rcfile/src/test/java/io/trino/rcfile/TestBufferedOutputStreamSliceOutput.java b/lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestBufferedOutputStreamSliceOutput.java similarity index 100% rename from lib/trino-rcfile/src/test/java/io/trino/rcfile/TestBufferedOutputStreamSliceOutput.java rename to lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestBufferedOutputStreamSliceOutput.java diff --git a/lib/trino-rcfile/src/test/java/io/trino/rcfile/TestFullRcFileReader.java b/lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestFullRcFileReader.java similarity index 100% rename from lib/trino-rcfile/src/test/java/io/trino/rcfile/TestFullRcFileReader.java rename to lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestFullRcFileReader.java diff --git a/lib/trino-rcfile/src/test/java/io/trino/rcfile/TestRcFileDecoderUtils.java b/lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestRcFileDecoderUtils.java similarity index 100% rename from lib/trino-rcfile/src/test/java/io/trino/rcfile/TestRcFileDecoderUtils.java rename to lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestRcFileDecoderUtils.java diff --git a/lib/trino-rcfile/src/test/java/io/trino/rcfile/TestRcFileReader.java b/lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestRcFileReader.java similarity index 100% rename from lib/trino-rcfile/src/test/java/io/trino/rcfile/TestRcFileReader.java rename to lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestRcFileReader.java diff --git a/lib/trino-rcfile/src/test/java/io/trino/rcfile/TestRcFileReaderManual.java b/lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestRcFileReaderManual.java similarity index 100% rename from lib/trino-rcfile/src/test/java/io/trino/rcfile/TestRcFileReaderManual.java rename to lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestRcFileReaderManual.java diff --git a/plugin/trino-hive/pom.xml b/plugin/trino-hive/pom.xml index 0912ff40b426..c51de92a12f8 100644 --- a/plugin/trino-hive/pom.xml +++ b/plugin/trino-hive/pom.xml @@ -49,27 +49,27 @@ io.trino - trino-memory-context + trino-hive-formats io.trino - trino-orc + trino-memory-context io.trino - trino-parquet + trino-orc io.trino - trino-plugin-toolkit + trino-parquet io.trino - trino-rcfile + trino-plugin-toolkit diff --git a/pom.xml b/pom.xml index 8acafb4b0d44..1ae8edee5ea0 100644 --- a/pom.xml +++ b/pom.xml @@ -118,6 +118,7 @@ lib/trino-geospatial-toolkit lib/trino-hadoop-toolkit lib/trino-hdfs + lib/trino-hive-formats lib/trino-matching lib/trino-memory-context lib/trino-orc @@ -125,7 +126,6 @@ lib/trino-phoenix5-patched lib/trino-plugin-toolkit - lib/trino-rcfile lib/trino-record-decoder plugin/trino-accumulo plugin/trino-accumulo-iterators @@ -357,6 +357,12 @@ ${project.version} + + io.trino + trino-hive-formats + ${project.version} + + io.trino trino-hive-hadoop2 @@ -558,12 +564,6 @@ ${project.version} - - io.trino - trino-rcfile - ${project.version} - - io.trino trino-record-decoder From 81d7ad11536474ce363dd8c287cdbb9af81aad58 Mon Sep 17 00:00:00 2001 From: Dain Sundstrom Date: Sat, 27 Aug 2022 16:53:17 -0700 Subject: [PATCH 02/10] Move RCFile to io.trino.hive.formats package --- .../rcfile/AircompressorCodecFactory.java | 2 +- .../rcfile/AircompressorCompressor.java | 2 +- .../rcfile/AircompressorDecompressor.java | 2 +- .../BufferedOutputStreamSliceOutput.java | 2 +- .../formats}/rcfile/ChunkedSliceOutput.java | 2 +- .../{ => hive/formats}/rcfile/ColumnData.java | 2 +- .../formats}/rcfile/ColumnEncoding.java | 2 +- .../formats}/rcfile/EncodeOutput.java | 2 +- .../formats}/rcfile/FileRcFileDataSource.java | 2 +- .../formats}/rcfile/HadoopCodecFactory.java | 2 +- .../formats}/rcfile/HadoopCompressor.java | 2 +- .../formats}/rcfile/HadoopDecompressor.java | 2 +- .../rcfile/MemoryRcFileDataSource.java | 2 +- .../formats}/rcfile/NoneCompressor.java | 2 +- .../formats}/rcfile/PageSplitterUtil.java | 2 +- .../formats}/rcfile/RcFileCodecFactory.java | 2 +- .../formats}/rcfile/RcFileCompressor.java | 2 +- .../rcfile/RcFileCorruptionException.java | 2 +- .../formats}/rcfile/RcFileDataSource.java | 2 +- .../formats}/rcfile/RcFileDataSourceId.java | 2 +- .../formats}/rcfile/RcFileDecoderUtils.java | 2 +- .../formats}/rcfile/RcFileDecompressor.java | 2 +- .../formats}/rcfile/RcFileEncoding.java | 2 +- .../formats}/rcfile/RcFileReader.java | 25 +++++++-------- .../rcfile/RcFileWriteValidation.java | 2 +- .../formats}/rcfile/RcFileWriter.java | 15 +++++---- .../formats}/rcfile/TimestampHolder.java | 2 +- .../formats}/rcfile/ValidationHash.java | 2 +- .../rcfile/binary/BinaryColumnEncoding.java | 4 +-- .../rcfile/binary/BinaryEncoding.java | 12 +++---- .../rcfile/binary/BinaryRcFileEncoding.java | 6 ++-- .../formats}/rcfile/binary/BlockEncoding.java | 6 ++-- .../rcfile/binary/BooleanEncoding.java | 6 ++-- .../formats}/rcfile/binary/ByteEncoding.java | 6 ++-- .../formats}/rcfile/binary/DateEncoding.java | 12 +++---- .../rcfile/binary/DecimalEncoding.java | 12 +++---- .../rcfile/binary/DoubleEncoding.java | 6 ++-- .../formats}/rcfile/binary/FloatEncoding.java | 6 ++-- .../formats}/rcfile/binary/ListEncoding.java | 12 +++---- .../formats}/rcfile/binary/LongEncoding.java | 21 ++++++------- .../formats}/rcfile/binary/MapEncoding.java | 12 +++---- .../formats}/rcfile/binary/ShortEncoding.java | 6 ++-- .../rcfile/binary/StringEncoding.java | 14 ++++----- .../rcfile/binary/StructEncoding.java | 2 +- .../rcfile/binary/TimestampEncoding.java | 31 +++++++++---------- .../formats}/rcfile/text/BinaryEncoding.java | 6 ++-- .../formats}/rcfile/text/BlockEncoding.java | 8 ++--- .../formats}/rcfile/text/BooleanEncoding.java | 6 ++-- .../formats}/rcfile/text/DateEncoding.java | 6 ++-- .../formats}/rcfile/text/DecimalEncoding.java | 6 ++-- .../formats}/rcfile/text/DoubleEncoding.java | 8 ++--- .../formats}/rcfile/text/FloatEncoding.java | 8 ++--- .../formats}/rcfile/text/ListEncoding.java | 4 +-- .../formats}/rcfile/text/LongEncoding.java | 6 ++-- .../formats}/rcfile/text/MapEncoding.java | 4 +-- .../formats}/rcfile/text/StringEncoding.java | 8 ++--- .../formats}/rcfile/text/StructEncoding.java | 4 +-- .../rcfile/text/TextColumnEncoding.java | 6 ++-- .../rcfile/text/TextRcFileEncoding.java | 6 ++-- .../rcfile/text/TimestampEncoding.java | 8 ++--- .../rcfile/AbstractTestRcFileReader.java | 4 +-- .../formats}/rcfile/RcFileTester.java | 22 ++++++------- .../TestBufferedOutputStreamSliceOutput.java | 2 +- .../formats}/rcfile/TestFullRcFileReader.java | 2 +- .../rcfile/TestRcFileDecoderUtils.java | 2 +- .../formats}/rcfile/TestRcFileReader.java | 2 +- .../rcfile/TestRcFileReaderManual.java | 4 +-- .../trino/plugin/hive/RcFileFileWriter.java | 10 +++--- .../plugin/hive/RcFileFileWriterFactory.java | 6 ++-- .../hive/rcfile/HdfsRcFileDataSource.java | 4 +-- .../plugin/hive/rcfile/RcFilePageSource.java | 4 +-- .../hive/rcfile/RcFilePageSourceFactory.java | 24 +++++++------- .../hive/benchmark/StandardFileFormats.java | 12 +++---- 73 files changed, 222 insertions(+), 236 deletions(-) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/AircompressorCodecFactory.java (98%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/AircompressorCompressor.java (98%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/AircompressorDecompressor.java (97%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/BufferedOutputStreamSliceOutput.java (99%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/ChunkedSliceOutput.java (99%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/ColumnData.java (96%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/ColumnEncoding.java (95%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/EncodeOutput.java (94%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/FileRcFileDataSource.java (98%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/HadoopCodecFactory.java (98%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/HadoopCompressor.java (98%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/HadoopDecompressor.java (98%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/MemoryRcFileDataSource.java (97%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/NoneCompressor.java (97%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/PageSplitterUtil.java (98%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/RcFileCodecFactory.java (95%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/RcFileCompressor.java (99%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/RcFileCorruptionException.java (96%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/RcFileDataSource.java (95%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/RcFileDataSourceId.java (97%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/RcFileDecoderUtils.java (99%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/RcFileDecompressor.java (95%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/RcFileEncoding.java (99%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/RcFileReader.java (96%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/RcFileWriteValidation.java (99%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/RcFileWriter.java (96%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/TimestampHolder.java (98%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/ValidationHash.java (99%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/BinaryColumnEncoding.java (91%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/BinaryEncoding.java (89%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/BinaryRcFileEncoding.java (95%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/BlockEncoding.java (95%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/BooleanEncoding.java (94%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/ByteEncoding.java (94%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/DateEncoding.java (87%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/DecimalEncoding.java (94%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/DoubleEncoding.java (95%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/FloatEncoding.java (95%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/ListEncoding.java (88%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/LongEncoding.java (78%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/MapEncoding.java (92%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/ShortEncoding.java (94%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/StringEncoding.java (88%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/StructEncoding.java (98%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/binary/TimestampEncoding.java (88%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/text/BinaryEncoding.java (95%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/text/BlockEncoding.java (93%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/text/BooleanEncoding.java (96%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/text/DateEncoding.java (96%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/text/DecimalEncoding.java (97%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/text/DoubleEncoding.java (94%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/text/FloatEncoding.java (94%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/text/ListEncoding.java (96%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/text/LongEncoding.java (96%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/text/MapEncoding.java (97%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/text/StringEncoding.java (95%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/text/StructEncoding.java (97%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/text/TextColumnEncoding.java (86%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/text/TextRcFileEncoding.java (97%) rename lib/trino-hive-formats/src/main/java/io/trino/{ => hive/formats}/rcfile/text/TimestampEncoding.java (96%) rename lib/trino-hive-formats/src/test/java/io/trino/{ => hive/formats}/rcfile/AbstractTestRcFileReader.java (98%) rename lib/trino-hive-formats/src/test/java/io/trino/{ => hive/formats}/rcfile/RcFileTester.java (98%) rename lib/trino-hive-formats/src/test/java/io/trino/{ => hive/formats}/rcfile/TestBufferedOutputStreamSliceOutput.java (98%) rename lib/trino-hive-formats/src/test/java/io/trino/{ => hive/formats}/rcfile/TestFullRcFileReader.java (95%) rename lib/trino-hive-formats/src/test/java/io/trino/{ => hive/formats}/rcfile/TestRcFileDecoderUtils.java (98%) rename lib/trino-hive-formats/src/test/java/io/trino/{ => hive/formats}/rcfile/TestRcFileReader.java (95%) rename lib/trino-hive-formats/src/test/java/io/trino/{ => hive/formats}/rcfile/TestRcFileReaderManual.java (99%) diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/AircompressorCodecFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorCodecFactory.java similarity index 98% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/AircompressorCodecFactory.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorCodecFactory.java index 869be0c63973..b33bd764a9e5 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/AircompressorCodecFactory.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorCodecFactory.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import io.airlift.compress.gzip.JdkGzipCodec; import io.airlift.compress.lz4.Lz4Codec; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/AircompressorCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorCompressor.java similarity index 98% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/AircompressorCompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorCompressor.java index 849fae82baf4..39b7d62a7941 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/AircompressorCompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorCompressor.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionOutputStream; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/AircompressorDecompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorDecompressor.java similarity index 97% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/AircompressorDecompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorDecompressor.java index b7759c43a6ea..2bce5ed4ab93 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/AircompressorDecompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorDecompressor.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import io.airlift.slice.Slice; import org.apache.hadoop.io.compress.CompressionCodec; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/BufferedOutputStreamSliceOutput.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/BufferedOutputStreamSliceOutput.java similarity index 99% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/BufferedOutputStreamSliceOutput.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/BufferedOutputStreamSliceOutput.java index 45df863814f9..7e96f0ec751e 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/BufferedOutputStreamSliceOutput.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/BufferedOutputStreamSliceOutput.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/ChunkedSliceOutput.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ChunkedSliceOutput.java similarity index 99% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/ChunkedSliceOutput.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ChunkedSliceOutput.java index ccebbb3070e2..bcc71947a0cb 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/ChunkedSliceOutput.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ChunkedSliceOutput.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import com.google.common.collect.ImmutableList; import io.airlift.slice.Slice; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/ColumnData.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ColumnData.java similarity index 96% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/ColumnData.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ColumnData.java index 123377ed9c2b..1abec9ddafd3 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/ColumnData.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ColumnData.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import io.airlift.slice.Slice; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/ColumnEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ColumnEncoding.java similarity index 95% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/ColumnEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ColumnEncoding.java index a12846cea8d7..657b76ec9151 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/ColumnEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ColumnEncoding.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import io.airlift.slice.SliceOutput; import io.trino.spi.block.Block; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/EncodeOutput.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/EncodeOutput.java similarity index 94% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/EncodeOutput.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/EncodeOutput.java index a90036988511..240a84b98247 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/EncodeOutput.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/EncodeOutput.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; public interface EncodeOutput { diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/FileRcFileDataSource.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/FileRcFileDataSource.java similarity index 98% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/FileRcFileDataSource.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/FileRcFileDataSource.java index 21d3188188a9..7ed9e7bb5bd8 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/FileRcFileDataSource.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/FileRcFileDataSource.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import java.io.File; import java.io.IOException; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/HadoopCodecFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopCodecFactory.java similarity index 98% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/HadoopCodecFactory.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopCodecFactory.java index f33c91ef4a1a..d78b02e07948 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/HadoopCodecFactory.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopCodecFactory.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.io.compress.CompressionCodec; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/HadoopCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopCompressor.java similarity index 98% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/HadoopCompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopCompressor.java index 9c4ff29b9fda..57a8d8d17f23 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/HadoopCompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopCompressor.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import org.apache.hadoop.io.compress.CodecPool; import org.apache.hadoop.io.compress.CompressionCodec; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/HadoopDecompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopDecompressor.java similarity index 98% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/HadoopDecompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopDecompressor.java index 99de02061158..aa50954f7c48 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/HadoopDecompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopDecompressor.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import io.airlift.slice.Slice; import org.apache.hadoop.io.compress.CodecPool; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/MemoryRcFileDataSource.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/MemoryRcFileDataSource.java similarity index 97% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/MemoryRcFileDataSource.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/MemoryRcFileDataSource.java index 9e2710f9bb16..09023eb1c9e8 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/MemoryRcFileDataSource.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/MemoryRcFileDataSource.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import io.airlift.slice.Slice; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/NoneCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/NoneCompressor.java similarity index 97% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/NoneCompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/NoneCompressor.java index c93a2d1e5ee1..beac358dd15c 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/NoneCompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/NoneCompressor.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import java.util.function.Supplier; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/PageSplitterUtil.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/PageSplitterUtil.java similarity index 98% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/PageSplitterUtil.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/PageSplitterUtil.java index ec6cab630d33..db4419935c3f 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/PageSplitterUtil.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/PageSplitterUtil.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import com.google.common.collect.ImmutableList; import io.trino.spi.Page; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileCodecFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCodecFactory.java similarity index 95% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileCodecFactory.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCodecFactory.java index 059d4403e7c7..9ae77789efa8 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileCodecFactory.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCodecFactory.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; public interface RcFileCodecFactory { diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCompressor.java similarity index 99% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileCompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCompressor.java index f4fb9cc6d1bc..632a57cd1df9 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileCompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCompressor.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import io.airlift.slice.Slice; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileCorruptionException.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCorruptionException.java similarity index 96% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileCorruptionException.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCorruptionException.java index c21512163e23..7720660b985c 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileCorruptionException.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCorruptionException.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import java.io.IOException; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDataSource.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDataSource.java similarity index 95% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDataSource.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDataSource.java index 6da2c05d63b7..87a5702748a3 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDataSource.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDataSource.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import java.io.Closeable; import java.io.IOException; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDataSourceId.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDataSourceId.java similarity index 97% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDataSourceId.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDataSourceId.java index b15257685184..6ce41fe852fe 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDataSourceId.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDataSourceId.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import java.util.Objects; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDecoderUtils.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDecoderUtils.java similarity index 99% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDecoderUtils.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDecoderUtils.java index ca496ad6d563..8366aa2a7f33 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDecoderUtils.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDecoderUtils.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import io.airlift.slice.Slice; import io.airlift.slice.SliceInput; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDecompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDecompressor.java similarity index 95% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDecompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDecompressor.java index 3391524c6958..980110579b18 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileDecompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDecompressor.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import io.airlift.slice.Slice; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileEncoding.java similarity index 99% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileEncoding.java index 53f3382fffef..4f3ef608907e 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileEncoding.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import io.trino.spi.TrinoException; import io.trino.spi.type.ArrayType; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileReader.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java similarity index 96% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileReader.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java index 7ab8849fb06b..01f1464bd75f 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileReader.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import com.google.common.collect.ImmutableMap; import io.airlift.slice.BasicSliceInput; @@ -23,8 +23,8 @@ import io.airlift.slice.Slices; import io.airlift.units.DataSize; import io.airlift.units.DataSize.Unit; -import io.trino.rcfile.RcFileWriteValidation.WriteChecksum; -import io.trino.rcfile.RcFileWriteValidation.WriteChecksumBuilder; +import io.trino.hive.formats.rcfile.RcFileWriteValidation.WriteChecksum; +import io.trino.hive.formats.rcfile.RcFileWriteValidation.WriteChecksumBuilder; import io.trino.spi.Page; import io.trino.spi.block.Block; import io.trino.spi.block.RunLengthEncodedBlock; @@ -44,9 +44,6 @@ import static com.google.common.io.ByteStreams.skipFully; import static io.airlift.slice.SizeOf.SIZE_OF_INT; import static io.airlift.slice.SizeOf.SIZE_OF_LONG; -import static io.trino.rcfile.RcFileDecoderUtils.findFirstSyncPosition; -import static io.trino.rcfile.RcFileDecoderUtils.readVInt; -import static io.trino.rcfile.RcFileWriteValidation.WriteChecksumBuilder.createWriteChecksumBuilder; import static java.lang.Math.min; import static java.lang.Math.toIntExact; import static java.util.Objects.requireNonNull; @@ -134,7 +131,7 @@ private RcFileReader( this.input = new ChunkedSliceInput(new DataSourceSliceLoader(dataSource), toIntExact(bufferSize.toBytes())); this.writeValidation = requireNonNull(writeValidation, "writeValidation is null"); - this.writeChecksumBuilder = writeValidation.map(validation -> createWriteChecksumBuilder(readColumns)); + this.writeChecksumBuilder = writeValidation.map(validation -> WriteChecksumBuilder.createWriteChecksumBuilder(readColumns)); verify(offset >= 0, "offset is negative"); verify(offset < dataSource.getSize(), "offset is greater than data size"); @@ -383,7 +380,7 @@ else if (rowsRead > 0) { BasicSliceInput headerInput = header.getInput(); // read number of rows in row group - rowGroupRowCount = toIntExact(readVInt(headerInput)); + rowGroupRowCount = toIntExact(RcFileDecoderUtils.readVInt(headerInput)); rowsRead += rowGroupRowCount; rowGroupPosition = 0; currentChunkRowCount = min(ColumnData.MAX_SIZE, rowGroupRowCount); @@ -391,14 +388,14 @@ else if (rowsRead > 0) { // set column buffers int totalCompressedDataSize = 0; for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) { - int compressedDataSize = toIntExact(readVInt(headerInput)); + int compressedDataSize = toIntExact(RcFileDecoderUtils.readVInt(headerInput)); totalCompressedDataSize += compressedDataSize; - int uncompressedDataSize = toIntExact(readVInt(headerInput)); + int uncompressedDataSize = toIntExact(RcFileDecoderUtils.readVInt(headerInput)); if (decompressor == null && compressedDataSize != uncompressedDataSize) { throw corrupt("Invalid RCFile %s", dataSource.getId()); } - int lengthsSize = toIntExact(readVInt(headerInput)); + int lengthsSize = toIntExact(RcFileDecoderUtils.readVInt(headerInput)); Slice lengthsBuffer = headerInput.readSlice(lengthsSize); @@ -442,7 +439,7 @@ public RcFileDataSourceId getId() private void seekToFirstRowGroupInRange(long offset, long length) throws IOException { - long startOfSyncSequence = findFirstSyncPosition(dataSource, offset, length, syncFirst, syncSecond); + long startOfSyncSequence = RcFileDecoderUtils.findFirstSyncPosition(dataSource, offset, length, syncFirst, syncSecond); if (startOfSyncSequence < 0) { closeQuietly(); return; @@ -462,7 +459,7 @@ private void closeQuietly() private Slice readLengthPrefixedString(SliceInput in) throws RcFileCorruptionException { - int length = toIntExact(readVInt(in)); + int length = toIntExact(RcFileDecoderUtils.readVInt(in)); verify(length <= MAX_METADATA_STRING_LENGTH, "Metadata string value is too long (%s) in RCFile %s", length, in); return in.readSlice(length); } @@ -630,7 +627,7 @@ private int readNextValueLength() return lastValueLength; } - int valueLength = toIntExact(readVInt(lengthsInput)); + int valueLength = toIntExact(RcFileDecoderUtils.readVInt(lengthsInput)); // negative length is used to encode a run or the last value if (valueLength < 0) { diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileWriteValidation.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriteValidation.java similarity index 99% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileWriteValidation.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriteValidation.java index 21a3ab3103b1..2deed4a6fbc7 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileWriteValidation.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriteValidation.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileWriter.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java similarity index 96% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileWriter.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java index 572e483add83..560e7f4e2202 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/RcFileWriter.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java @@ -11,15 +11,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import com.google.common.io.Closer; import io.airlift.slice.DynamicSliceOutput; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; import io.airlift.units.DataSize; -import io.trino.rcfile.RcFileCompressor.CompressedSliceOutput; -import io.trino.rcfile.RcFileWriteValidation.RcFileWriteValidationBuilder; +import io.trino.hive.formats.rcfile.RcFileCompressor.CompressedSliceOutput; +import io.trino.hive.formats.rcfile.RcFileWriteValidation.RcFileWriteValidationBuilder; import io.trino.spi.Page; import io.trino.spi.block.Block; import io.trino.spi.type.Type; @@ -41,10 +41,9 @@ import static io.airlift.slice.Slices.utf8Slice; import static io.airlift.units.DataSize.Unit.KILOBYTE; import static io.airlift.units.DataSize.Unit.MEGABYTE; -import static io.trino.rcfile.PageSplitterUtil.splitPage; -import static io.trino.rcfile.RcFileDecoderUtils.writeLengthPrefixedString; -import static io.trino.rcfile.RcFileDecoderUtils.writeVInt; -import static io.trino.rcfile.RcFileReader.validateFile; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.writeLengthPrefixedString; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.writeVInt; +import static io.trino.hive.formats.rcfile.RcFileReader.validateFile; import static java.lang.StrictMath.toIntExact; import static java.util.Objects.requireNonNull; @@ -239,7 +238,7 @@ public void write(Page page) if (page.getPositionCount() == 0) { return; } - List pages = splitPage(page, targetMaxRowGroupSize); + List pages = PageSplitterUtil.splitPage(page, targetMaxRowGroupSize); for (Page splitPage : pages) { bufferPage(splitPage); } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/TimestampHolder.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/TimestampHolder.java similarity index 98% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/TimestampHolder.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/TimestampHolder.java index c770ec18a18e..9515602364d5 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/TimestampHolder.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/TimestampHolder.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import io.trino.spi.block.Block; import io.trino.spi.type.LongTimestamp; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/ValidationHash.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ValidationHash.java similarity index 99% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/ValidationHash.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ValidationHash.java index 2f90bf94dfbd..1ead4db8192d 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/ValidationHash.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ValidationHash.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import io.trino.spi.block.Block; import io.trino.spi.function.InvocationConvention; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BinaryColumnEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryColumnEncoding.java similarity index 91% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BinaryColumnEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryColumnEncoding.java index 2ef66e228c70..c00fc5bfce3c 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BinaryColumnEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryColumnEncoding.java @@ -11,11 +11,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnEncoding; +import io.trino.hive.formats.rcfile.ColumnEncoding; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BinaryEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryEncoding.java similarity index 89% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BinaryEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryEncoding.java index 1e4033f5a258..e7829a69162f 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BinaryEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryEncoding.java @@ -11,19 +11,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; -import static io.trino.rcfile.RcFileDecoderUtils.decodeVIntSize; -import static io.trino.rcfile.RcFileDecoderUtils.readVInt; -import static io.trino.rcfile.RcFileDecoderUtils.writeVInt; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.decodeVIntSize; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.readVInt; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.writeVInt; import static java.lang.Math.toIntExact; public class BinaryEncoding diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BinaryRcFileEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryRcFileEncoding.java similarity index 95% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BinaryRcFileEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryRcFileEncoding.java index ac085212816f..1f77eaa1cb11 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BinaryRcFileEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryRcFileEncoding.java @@ -11,10 +11,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; -import io.trino.rcfile.ColumnEncoding; -import io.trino.rcfile.RcFileEncoding; +import io.trino.hive.formats.rcfile.ColumnEncoding; +import io.trino.hive.formats.rcfile.RcFileEncoding; import io.trino.spi.type.TimestampType; import io.trino.spi.type.Type; import org.joda.time.DateTimeZone; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BlockEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BlockEncoding.java similarity index 95% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BlockEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BlockEncoding.java index 0327dfc39852..4f5e329a148c 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BlockEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BlockEncoding.java @@ -11,13 +11,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; import io.airlift.slice.DynamicSliceOutput; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BooleanEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BooleanEncoding.java similarity index 94% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BooleanEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BooleanEncoding.java index fa1b0e3fb9df..04f1e2a6bec9 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/BooleanEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BooleanEncoding.java @@ -11,12 +11,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/ByteEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ByteEncoding.java similarity index 94% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/ByteEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ByteEncoding.java index 4e78a098d6cf..d41dc44c58ee 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/ByteEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ByteEncoding.java @@ -11,12 +11,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/DateEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DateEncoding.java similarity index 87% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/DateEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DateEncoding.java index 78f0528239e1..37adf8f72d29 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/DateEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DateEncoding.java @@ -11,19 +11,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; -import static io.trino.rcfile.RcFileDecoderUtils.decodeVIntSize; -import static io.trino.rcfile.RcFileDecoderUtils.readVInt; -import static io.trino.rcfile.RcFileDecoderUtils.writeVInt; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.decodeVIntSize; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.readVInt; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.writeVInt; import static java.lang.Math.toIntExact; public class DateEncoding diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/DecimalEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DecimalEncoding.java similarity index 94% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/DecimalEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DecimalEncoding.java index 4df1218e111a..2e0671cb48ab 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/DecimalEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DecimalEncoding.java @@ -11,13 +11,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; import io.airlift.slice.Slices; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.DecimalType; @@ -28,9 +28,9 @@ import java.math.BigInteger; import static com.google.common.base.Preconditions.checkState; -import static io.trino.rcfile.RcFileDecoderUtils.decodeVIntSize; -import static io.trino.rcfile.RcFileDecoderUtils.readVInt; -import static io.trino.rcfile.RcFileDecoderUtils.writeVInt; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.decodeVIntSize; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.readVInt; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.writeVInt; import static io.trino.spi.type.Decimals.rescale; import static java.lang.Math.toIntExact; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/DoubleEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DoubleEncoding.java similarity index 95% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/DoubleEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DoubleEncoding.java index 338b67b3939b..c0acf5f96343 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/DoubleEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DoubleEncoding.java @@ -11,12 +11,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/FloatEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/FloatEncoding.java similarity index 95% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/FloatEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/FloatEncoding.java index b3ff029ed84f..d6dd88f6474c 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/FloatEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/FloatEncoding.java @@ -11,12 +11,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/ListEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ListEncoding.java similarity index 88% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/ListEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ListEncoding.java index 0cd8690ee523..ecf16a1cc62c 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/ListEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ListEncoding.java @@ -11,17 +11,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; +import io.trino.hive.formats.rcfile.RcFileDecoderUtils; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; -import static io.trino.rcfile.RcFileDecoderUtils.decodeVIntSize; -import static io.trino.rcfile.RcFileDecoderUtils.readVInt; -import static io.trino.rcfile.RcFileDecoderUtils.writeVInt; import static java.lang.Math.toIntExact; public class ListEncoding @@ -39,7 +37,7 @@ public ListEncoding(Type type, BinaryColumnEncoding elementEncoding) public void encodeValue(Block block, int position, SliceOutput output) { Block list = block.getObject(position, Block.class); - writeVInt(output, list.getPositionCount()); + RcFileDecoderUtils.writeVInt(output, list.getPositionCount()); // write null bits int nullByte = 0; @@ -66,8 +64,8 @@ public void encodeValue(Block block, int position, SliceOutput output) public void decodeValueInto(BlockBuilder builder, Slice slice, int offset, int length) { // entries in list - int entries = toIntExact(readVInt(slice, offset)); - offset += decodeVIntSize(slice.getByte(offset)); + int entries = toIntExact(RcFileDecoderUtils.readVInt(slice, offset)); + offset += RcFileDecoderUtils.decodeVIntSize(slice.getByte(offset)); // null bytes int nullByteCur = offset; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/LongEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/LongEncoding.java similarity index 78% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/LongEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/LongEncoding.java index 622bf0d5592d..d621fd5b6669 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/LongEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/LongEncoding.java @@ -11,20 +11,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.RcFileDecoderUtils; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; -import static io.trino.rcfile.RcFileDecoderUtils.decodeVIntSize; -import static io.trino.rcfile.RcFileDecoderUtils.readVInt; -import static io.trino.rcfile.RcFileDecoderUtils.writeVLong; - public class LongEncoding implements BinaryColumnEncoding { @@ -40,7 +37,7 @@ public void encodeColumn(Block block, SliceOutput output, EncodeOutput encodeOut { for (int position = 0; position < block.getPositionCount(); position++) { if (!block.isNull(position)) { - writeVLong(output, type.getLong(block, position)); + RcFileDecoderUtils.writeVLong(output, type.getLong(block, position)); } encodeOutput.closeEntry(); } @@ -49,7 +46,7 @@ public void encodeColumn(Block block, SliceOutput output, EncodeOutput encodeOut @Override public void encodeValueInto(Block block, int position, SliceOutput output) { - writeVLong(output, type.getLong(block, position)); + RcFileDecoderUtils.writeVLong(output, type.getLong(block, position)); } @Override @@ -66,7 +63,7 @@ public Block decodeColumn(ColumnData columnData) builder.appendNull(); } else { - type.writeLong(builder, readVInt(slice, offset, length)); + type.writeLong(builder, RcFileDecoderUtils.readVInt(slice, offset, length)); } } return builder.build(); @@ -81,12 +78,12 @@ public int getValueOffset(Slice slice, int offset) @Override public int getValueLength(Slice slice, int offset) { - return decodeVIntSize(slice, offset); + return RcFileDecoderUtils.decodeVIntSize(slice, offset); } @Override public void decodeValueInto(BlockBuilder builder, Slice slice, int offset, int length) { - type.writeLong(builder, readVInt(slice, offset, length)); + type.writeLong(builder, RcFileDecoderUtils.readVInt(slice, offset, length)); } } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/MapEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/MapEncoding.java similarity index 92% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/MapEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/MapEncoding.java index 88c049dfa31a..1162a1a3025a 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/MapEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/MapEncoding.java @@ -11,19 +11,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; +import io.trino.hive.formats.rcfile.RcFileDecoderUtils; import io.trino.spi.StandardErrorCode; import io.trino.spi.TrinoException; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; -import static io.trino.rcfile.RcFileDecoderUtils.decodeVIntSize; -import static io.trino.rcfile.RcFileDecoderUtils.readVInt; -import static io.trino.rcfile.RcFileDecoderUtils.writeVInt; import static java.lang.Math.toIntExact; public class MapEncoding @@ -45,7 +43,7 @@ public void encodeValue(Block block, int position, SliceOutput output) Block map = block.getObject(position, Block.class); // write entry count - writeVInt(output, map.getPositionCount() / 2); + RcFileDecoderUtils.writeVInt(output, map.getPositionCount() / 2); // write null bits int nullByte = 0b0101_0101; @@ -86,8 +84,8 @@ public void encodeValue(Block block, int position, SliceOutput output) public void decodeValueInto(BlockBuilder builder, Slice slice, int offset, int length) { // entries in list - int entries = toIntExact(readVInt(slice, offset)); - offset += decodeVIntSize(slice.getByte(offset)); + int entries = toIntExact(RcFileDecoderUtils.readVInt(slice, offset)); + offset += RcFileDecoderUtils.decodeVIntSize(slice.getByte(offset)); // null bytes int nullByteCur = offset; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/ShortEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ShortEncoding.java similarity index 94% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/ShortEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ShortEncoding.java index 7269e942afd4..1db68d26381d 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/ShortEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ShortEncoding.java @@ -11,12 +11,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/StringEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StringEncoding.java similarity index 88% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/StringEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StringEncoding.java index edb2b387caba..df528a00f938 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/StringEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StringEncoding.java @@ -11,21 +11,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; import static io.airlift.slice.Slices.EMPTY_SLICE; -import static io.trino.rcfile.RcFileDecoderUtils.calculateTruncationLength; -import static io.trino.rcfile.RcFileDecoderUtils.decodeVIntSize; -import static io.trino.rcfile.RcFileDecoderUtils.readVInt; -import static io.trino.rcfile.RcFileDecoderUtils.writeVInt; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.calculateTruncationLength; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.decodeVIntSize; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.readVInt; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.writeVInt; import static java.lang.Math.toIntExact; public class StringEncoding diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/StructEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StructEncoding.java similarity index 98% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/StructEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StructEncoding.java index 9f7d48145fc9..c45f2cf210fd 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/StructEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StructEncoding.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; import com.google.common.collect.ImmutableList; import io.airlift.slice.Slice; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/TimestampEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/TimestampEncoding.java similarity index 88% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/TimestampEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/TimestampEncoding.java index 2f7d7134b902..ef0e8ba2c75e 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/binary/TimestampEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/TimestampEncoding.java @@ -11,15 +11,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.binary; +package io.trino.hive.formats.rcfile.binary; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.RcFileDecoderUtils; +import io.trino.hive.formats.rcfile.TimestampHolder; import io.trino.plugin.base.type.DecodedTimestamp; import io.trino.plugin.base.type.TrinoTimestampEncoder; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; -import io.trino.rcfile.TimestampHolder; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.TimestampType; @@ -29,10 +30,6 @@ import static io.airlift.slice.SizeOf.SIZE_OF_INT; import static io.trino.plugin.base.type.TrinoTimestampEncoderFactory.createTimestampEncoder; -import static io.trino.rcfile.RcFileDecoderUtils.decodeVIntSize; -import static io.trino.rcfile.RcFileDecoderUtils.isNegativeVInt; -import static io.trino.rcfile.RcFileDecoderUtils.readVInt; -import static io.trino.rcfile.RcFileDecoderUtils.writeVInt; import static io.trino.spi.type.Timestamps.MILLISECONDS_PER_SECOND; import static java.util.Objects.requireNonNull; @@ -100,12 +97,12 @@ public int getValueLength(Slice slice, int offset) { int length = 4; if (hasNanosVInt(slice.getByte(offset))) { - int nanosVintLength = decodeVIntSize(slice, offset + 4); + int nanosVintLength = RcFileDecoderUtils.decodeVIntSize(slice, offset + 4); length += nanosVintLength; // is there extra data for "seconds" - if (isNegativeVInt(slice, offset + 4)) { - length += decodeVIntSize(slice, offset + 4 + nanosVintLength); + if (RcFileDecoderUtils.isNegativeVInt(slice, offset + 4)) { + length += RcFileDecoderUtils.decodeVIntSize(slice, offset + 4 + nanosVintLength); } } return length; @@ -136,15 +133,15 @@ private DecodedTimestamp getTimestamp(Slice slice, int offset) // this is an inline version of readVint so it can be stitched together // the code to read the seconds high bits below byte nanosFirstByte = slice.getByte(offset); - int nanosLength = decodeVIntSize(nanosFirstByte); - nanos = (int) readVInt(slice, offset, nanosLength); + int nanosLength = RcFileDecoderUtils.decodeVIntSize(nanosFirstByte); + nanos = (int) RcFileDecoderUtils.readVInt(slice, offset, nanosLength); nanos = decodeNanos(nanos); // read seconds (high 32 bits) - if (isNegativeVInt(nanosFirstByte)) { + if (RcFileDecoderUtils.isNegativeVInt(nanosFirstByte)) { // We compose the seconds field from two parts. The lowest 31 bits come from the first four // bytes. The higher-order bits come from the second VInt that follows the nanos field. - long highBits = readVInt(slice, offset + nanosLength); + long highBits = RcFileDecoderUtils.readVInt(slice, offset + nanosLength); seconds |= (highBits << 31); } } @@ -208,12 +205,12 @@ private static void writeTimestamp(long seconds, int nanos, SliceOutput output) if (hasSecondsHigh32 || nanosReversed != 0) { // The sign of the reversed-nanoseconds field indicates that there is a second VInt present int value = hasSecondsHigh32 ? ~nanosReversed : nanosReversed; - writeVInt(output, value); + RcFileDecoderUtils.writeVInt(output, value); } if (hasSecondsHigh32) { int secondsHigh32 = (int) (seconds >> 31); - writeVInt(output, secondsHigh32); + RcFileDecoderUtils.writeVInt(output, secondsHigh32); } } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/BinaryEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BinaryEncoding.java similarity index 95% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/BinaryEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BinaryEncoding.java index dba5c0912aea..70a201e95809 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/BinaryEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BinaryEncoding.java @@ -11,13 +11,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.text; +package io.trino.hive.formats.rcfile.text; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; import io.airlift.slice.Slices; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/BlockEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BlockEncoding.java similarity index 93% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/BlockEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BlockEncoding.java index e4c9b486dd90..49ebb57f6ca4 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/BlockEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BlockEncoding.java @@ -11,13 +11,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.text; +package io.trino.hive.formats.rcfile.text; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; -import io.trino.rcfile.RcFileCorruptionException; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.RcFileCorruptionException; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/BooleanEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BooleanEncoding.java similarity index 96% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/BooleanEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BooleanEncoding.java index b540ff0b39d0..16fcc8d3db27 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/BooleanEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/BooleanEncoding.java @@ -11,13 +11,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.text; +package io.trino.hive.formats.rcfile.text; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; import io.airlift.slice.Slices; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/DateEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DateEncoding.java similarity index 96% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/DateEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DateEncoding.java index 8ede2e33cd6d..a57ef20b00d1 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/DateEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DateEncoding.java @@ -11,12 +11,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.text; +package io.trino.hive.formats.rcfile.text; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/DecimalEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DecimalEncoding.java similarity index 97% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/DecimalEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DecimalEncoding.java index 4fff1d62ea1b..0d1b254c5243 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/DecimalEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DecimalEncoding.java @@ -11,12 +11,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.text; +package io.trino.hive.formats.rcfile.text; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.DecimalType; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/DoubleEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DoubleEncoding.java similarity index 94% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/DoubleEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DoubleEncoding.java index 18eda352b42c..425349c6facb 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/DoubleEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DoubleEncoding.java @@ -11,13 +11,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.text; +package io.trino.hive.formats.rcfile.text; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; -import io.trino.rcfile.RcFileCorruptionException; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.RcFileCorruptionException; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/FloatEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/FloatEncoding.java similarity index 94% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/FloatEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/FloatEncoding.java index 6f64f52d5503..0fba42219358 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/FloatEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/FloatEncoding.java @@ -11,13 +11,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.text; +package io.trino.hive.formats.rcfile.text; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; -import io.trino.rcfile.RcFileCorruptionException; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.RcFileCorruptionException; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/ListEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/ListEncoding.java similarity index 96% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/ListEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/ListEncoding.java index 140a445633ab..87f0800ac6ce 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/ListEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/ListEncoding.java @@ -11,11 +11,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.text; +package io.trino.hive.formats.rcfile.text; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.RcFileCorruptionException; +import io.trino.hive.formats.rcfile.RcFileCorruptionException; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/LongEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/LongEncoding.java similarity index 96% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/LongEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/LongEncoding.java index 280064a3f496..e9253fa8bd5e 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/LongEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/LongEncoding.java @@ -11,13 +11,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.text; +package io.trino.hive.formats.rcfile.text; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; import io.airlift.slice.Slices; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/MapEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/MapEncoding.java similarity index 97% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/MapEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/MapEncoding.java index ed5731e47fc3..9000401c8b99 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/MapEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/MapEncoding.java @@ -11,11 +11,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.text; +package io.trino.hive.formats.rcfile.text; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.RcFileCorruptionException; +import io.trino.hive.formats.rcfile.RcFileCorruptionException; import io.trino.spi.StandardErrorCode; import io.trino.spi.TrinoException; import io.trino.spi.block.Block; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/StringEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/StringEncoding.java similarity index 95% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/StringEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/StringEncoding.java index be7a1a8bf427..76486ba7fdf7 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/StringEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/StringEncoding.java @@ -11,18 +11,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.text; +package io.trino.hive.formats.rcfile.text; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; import io.airlift.slice.Slices; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; -import static io.trino.rcfile.RcFileDecoderUtils.calculateTruncationLength; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.calculateTruncationLength; public class StringEncoding implements TextColumnEncoding diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/StructEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/StructEncoding.java similarity index 97% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/StructEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/StructEncoding.java index d70941058fc8..560cbdfda5c8 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/StructEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/StructEncoding.java @@ -11,11 +11,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.text; +package io.trino.hive.formats.rcfile.text; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.RcFileCorruptionException; +import io.trino.hive.formats.rcfile.RcFileCorruptionException; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/TextColumnEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TextColumnEncoding.java similarity index 86% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/TextColumnEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TextColumnEncoding.java index 91d2890bead5..4f3da2c52ac0 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/TextColumnEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TextColumnEncoding.java @@ -11,12 +11,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.text; +package io.trino.hive.formats.rcfile.text; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.rcfile.ColumnEncoding; -import io.trino.rcfile.RcFileCorruptionException; +import io.trino.hive.formats.rcfile.ColumnEncoding; +import io.trino.hive.formats.rcfile.RcFileCorruptionException; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/TextRcFileEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TextRcFileEncoding.java similarity index 97% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/TextRcFileEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TextRcFileEncoding.java index 8186ee23d366..4eb1781ab642 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/TextRcFileEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TextRcFileEncoding.java @@ -11,12 +11,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.text; +package io.trino.hive.formats.rcfile.text; import io.airlift.slice.Slice; import io.airlift.slice.Slices; -import io.trino.rcfile.ColumnEncoding; -import io.trino.rcfile.RcFileEncoding; +import io.trino.hive.formats.rcfile.ColumnEncoding; +import io.trino.hive.formats.rcfile.RcFileEncoding; import io.trino.spi.type.TimestampType; import io.trino.spi.type.Type; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/TimestampEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TimestampEncoding.java similarity index 96% rename from lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/TimestampEncoding.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TimestampEncoding.java index 2230fdf7bb6b..a4630a42b11b 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/rcfile/text/TimestampEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/TimestampEncoding.java @@ -11,15 +11,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile.text; +package io.trino.hive.formats.rcfile.text; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; +import io.trino.hive.formats.rcfile.ColumnData; +import io.trino.hive.formats.rcfile.EncodeOutput; +import io.trino.hive.formats.rcfile.TimestampHolder; import io.trino.plugin.base.type.DecodedTimestamp; import io.trino.plugin.base.type.TrinoTimestampEncoder; -import io.trino.rcfile.ColumnData; -import io.trino.rcfile.EncodeOutput; -import io.trino.rcfile.TimestampHolder; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.TimestampType; diff --git a/lib/trino-hive-formats/src/test/java/io/trino/rcfile/AbstractTestRcFileReader.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/AbstractTestRcFileReader.java similarity index 98% rename from lib/trino-hive-formats/src/test/java/io/trino/rcfile/AbstractTestRcFileReader.java rename to lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/AbstractTestRcFileReader.java index 4aec0b4b75b9..5df8dfb866b5 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/rcfile/AbstractTestRcFileReader.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/AbstractTestRcFileReader.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import com.google.common.collect.ContiguousSet; import com.google.common.collect.DiscreteDomain; @@ -32,7 +32,7 @@ import static com.google.common.collect.Iterables.cycle; import static com.google.common.collect.Iterables.limit; -import static io.trino.rcfile.RcFileTester.Format.BINARY; +import static io.trino.hive.formats.rcfile.RcFileTester.Format.BINARY; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.BooleanType.BOOLEAN; import static io.trino.spi.type.DateType.DATE; diff --git a/lib/trino-hive-formats/src/test/java/io/trino/rcfile/RcFileTester.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java similarity index 98% rename from lib/trino-hive-formats/src/test/java/io/trino/rcfile/RcFileTester.java rename to lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java index d657066f9cd1..6a6698d55361 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/rcfile/RcFileTester.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import com.google.common.collect.AbstractIterator; import com.google.common.collect.ImmutableList; @@ -24,8 +24,8 @@ import io.airlift.slice.Slices; import io.airlift.units.DataSize; import io.trino.hadoop.HadoopNative; -import io.trino.rcfile.binary.BinaryRcFileEncoding; -import io.trino.rcfile.text.TextRcFileEncoding; +import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding; +import io.trino.hive.formats.rcfile.text.TextRcFileEncoding; import io.trino.spi.Page; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; @@ -121,14 +121,14 @@ import static io.airlift.units.DataSize.Unit.KILOBYTE; import static io.airlift.units.DataSize.Unit.MEGABYTE; import static io.trino.hadoop.ConfigurationInstantiator.newEmptyConfiguration; -import static io.trino.rcfile.RcFileDecoderUtils.findFirstSyncPosition; -import static io.trino.rcfile.RcFileTester.Compression.BZIP2; -import static io.trino.rcfile.RcFileTester.Compression.LZ4; -import static io.trino.rcfile.RcFileTester.Compression.NONE; -import static io.trino.rcfile.RcFileTester.Compression.SNAPPY; -import static io.trino.rcfile.RcFileTester.Compression.ZLIB; -import static io.trino.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION; -import static io.trino.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY; +import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.findFirstSyncPosition; +import static io.trino.hive.formats.rcfile.RcFileTester.Compression.BZIP2; +import static io.trino.hive.formats.rcfile.RcFileTester.Compression.LZ4; +import static io.trino.hive.formats.rcfile.RcFileTester.Compression.NONE; +import static io.trino.hive.formats.rcfile.RcFileTester.Compression.SNAPPY; +import static io.trino.hive.formats.rcfile.RcFileTester.Compression.ZLIB; +import static io.trino.hive.formats.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION; +import static io.trino.hive.formats.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.BooleanType.BOOLEAN; import static io.trino.spi.type.DateType.DATE; diff --git a/lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestBufferedOutputStreamSliceOutput.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestBufferedOutputStreamSliceOutput.java similarity index 98% rename from lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestBufferedOutputStreamSliceOutput.java rename to lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestBufferedOutputStreamSliceOutput.java index dc41c1283175..218de61a9dcf 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestBufferedOutputStreamSliceOutput.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestBufferedOutputStreamSliceOutput.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import io.airlift.slice.Slice; import io.airlift.slice.Slices; diff --git a/lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestFullRcFileReader.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestFullRcFileReader.java similarity index 95% rename from lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestFullRcFileReader.java rename to lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestFullRcFileReader.java index 6d9db727169d..bfa0722f2aa3 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestFullRcFileReader.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestFullRcFileReader.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; public class TestFullRcFileReader extends AbstractTestRcFileReader diff --git a/lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestRcFileDecoderUtils.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileDecoderUtils.java similarity index 98% rename from lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestRcFileDecoderUtils.java rename to lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileDecoderUtils.java index 070965c6b2dd..876d9cd17d21 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestRcFileDecoderUtils.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileDecoderUtils.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; diff --git a/lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestRcFileReader.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReader.java similarity index 95% rename from lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestRcFileReader.java rename to lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReader.java index c975c4a7d4b3..2cd920674543 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestRcFileReader.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReader.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; public class TestRcFileReader extends AbstractTestRcFileReader diff --git a/lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestRcFileReaderManual.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java similarity index 99% rename from lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestRcFileReaderManual.java rename to lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java index e87f4527aaac..2ce0cdc80398 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/rcfile/TestRcFileReaderManual.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.rcfile; +package io.trino.hive.formats.rcfile; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -19,7 +19,7 @@ import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; import io.airlift.units.DataSize; -import io.trino.rcfile.binary.BinaryRcFileEncoding; +import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding; import io.trino.spi.block.Block; import org.joda.time.DateTimeZone; import org.testng.annotations.Test; diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java index 50ec569d9024..364de626a182 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java @@ -16,11 +16,11 @@ import com.google.common.collect.ImmutableList; import com.google.common.io.CountingOutputStream; import io.airlift.slice.OutputStreamSliceOutput; -import io.trino.rcfile.AircompressorCodecFactory; -import io.trino.rcfile.HadoopCodecFactory; -import io.trino.rcfile.RcFileDataSource; -import io.trino.rcfile.RcFileEncoding; -import io.trino.rcfile.RcFileWriter; +import io.trino.hive.formats.rcfile.AircompressorCodecFactory; +import io.trino.hive.formats.rcfile.HadoopCodecFactory; +import io.trino.hive.formats.rcfile.RcFileDataSource; +import io.trino.hive.formats.rcfile.RcFileEncoding; +import io.trino.hive.formats.rcfile.RcFileWriter; import io.trino.spi.Page; import io.trino.spi.TrinoException; import io.trino.spi.block.Block; diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java index 61672051aebe..224f8dc6b56a 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java @@ -15,12 +15,12 @@ import com.google.common.collect.ImmutableMap; import io.trino.hdfs.HdfsEnvironment; +import io.trino.hive.formats.rcfile.RcFileDataSource; +import io.trino.hive.formats.rcfile.RcFileEncoding; +import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding; import io.trino.plugin.hive.acid.AcidTransaction; import io.trino.plugin.hive.metastore.StorageFormat; import io.trino.plugin.hive.rcfile.HdfsRcFileDataSource; -import io.trino.rcfile.RcFileDataSource; -import io.trino.rcfile.RcFileEncoding; -import io.trino.rcfile.binary.BinaryRcFileEncoding; import io.trino.spi.TrinoException; import io.trino.spi.connector.ConnectorSession; import io.trino.spi.type.Type; diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/HdfsRcFileDataSource.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/HdfsRcFileDataSource.java index 6def5227717e..822cef0cddcf 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/HdfsRcFileDataSource.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/HdfsRcFileDataSource.java @@ -13,9 +13,9 @@ */ package io.trino.plugin.hive.rcfile; +import io.trino.hive.formats.rcfile.RcFileDataSource; +import io.trino.hive.formats.rcfile.RcFileDataSourceId; import io.trino.plugin.hive.FileFormatDataSourceStats; -import io.trino.rcfile.RcFileDataSource; -import io.trino.rcfile.RcFileDataSourceId; import org.apache.hadoop.fs.FSDataInputStream; import java.io.IOException; diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSource.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSource.java index 918ff1942236..48bc5309f174 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSource.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSource.java @@ -15,10 +15,10 @@ import com.google.common.collect.ImmutableList; import io.airlift.units.DataSize; +import io.trino.hive.formats.rcfile.RcFileCorruptionException; +import io.trino.hive.formats.rcfile.RcFileReader; import io.trino.plugin.hive.HiveColumnHandle; import io.trino.plugin.hive.HiveType; -import io.trino.rcfile.RcFileCorruptionException; -import io.trino.rcfile.RcFileReader; import io.trino.spi.Page; import io.trino.spi.TrinoException; import io.trino.spi.block.Block; diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java index 90af2d161ae2..940c4006bad7 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java @@ -20,6 +20,16 @@ import io.airlift.units.DataSize.Unit; import io.trino.hdfs.FSDataInputStreamTail; import io.trino.hdfs.HdfsEnvironment; +import io.trino.hive.formats.rcfile.AircompressorCodecFactory; +import io.trino.hive.formats.rcfile.HadoopCodecFactory; +import io.trino.hive.formats.rcfile.MemoryRcFileDataSource; +import io.trino.hive.formats.rcfile.RcFileCorruptionException; +import io.trino.hive.formats.rcfile.RcFileDataSource; +import io.trino.hive.formats.rcfile.RcFileDataSourceId; +import io.trino.hive.formats.rcfile.RcFileEncoding; +import io.trino.hive.formats.rcfile.RcFileReader; +import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding; +import io.trino.hive.formats.rcfile.text.TextRcFileEncoding; import io.trino.plugin.hive.AcidInfo; import io.trino.plugin.hive.FileFormatDataSourceStats; import io.trino.plugin.hive.HiveColumnHandle; @@ -29,16 +39,6 @@ import io.trino.plugin.hive.ReaderColumns; import io.trino.plugin.hive.ReaderPageSource; import io.trino.plugin.hive.acid.AcidTransaction; -import io.trino.rcfile.AircompressorCodecFactory; -import io.trino.rcfile.HadoopCodecFactory; -import io.trino.rcfile.MemoryRcFileDataSource; -import io.trino.rcfile.RcFileCorruptionException; -import io.trino.rcfile.RcFileDataSource; -import io.trino.rcfile.RcFileDataSourceId; -import io.trino.rcfile.RcFileEncoding; -import io.trino.rcfile.RcFileReader; -import io.trino.rcfile.binary.BinaryRcFileEncoding; -import io.trino.rcfile.text.TextRcFileEncoding; import io.trino.spi.TrinoException; import io.trino.spi.connector.ConnectorPageSource; import io.trino.spi.connector.ConnectorSession; @@ -65,6 +65,8 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Strings.nullToEmpty; import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.trino.hive.formats.rcfile.text.TextRcFileEncoding.DEFAULT_NULL_SEQUENCE; +import static io.trino.hive.formats.rcfile.text.TextRcFileEncoding.getDefaultSeparators; import static io.trino.plugin.hive.HiveErrorCode.HIVE_BAD_DATA; import static io.trino.plugin.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT; import static io.trino.plugin.hive.HiveErrorCode.HIVE_MISSING_DATA; @@ -82,8 +84,6 @@ import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST; import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_NULL_FORMAT; -import static io.trino.rcfile.text.TextRcFileEncoding.DEFAULT_NULL_SEQUENCE; -import static io.trino.rcfile.text.TextRcFileEncoding.getDefaultSeparators; import static java.lang.Math.min; import static java.lang.Math.toIntExact; import static java.lang.String.format; diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java index b30ea5c6053f..b16adda5e896 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java @@ -17,6 +17,12 @@ import io.airlift.slice.OutputStreamSliceOutput; import io.trino.filesystem.hdfs.HdfsFileSystemFactory; import io.trino.hdfs.HdfsEnvironment; +import io.trino.hive.formats.rcfile.AircompressorCodecFactory; +import io.trino.hive.formats.rcfile.HadoopCodecFactory; +import io.trino.hive.formats.rcfile.RcFileEncoding; +import io.trino.hive.formats.rcfile.RcFileWriter; +import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding; +import io.trino.hive.formats.rcfile.text.TextRcFileEncoding; import io.trino.orc.OrcReaderOptions; import io.trino.orc.OrcWriter; import io.trino.orc.OrcWriterOptions; @@ -37,12 +43,6 @@ import io.trino.plugin.hive.parquet.ParquetPageSourceFactory; import io.trino.plugin.hive.parquet.ParquetReaderConfig; import io.trino.plugin.hive.rcfile.RcFilePageSourceFactory; -import io.trino.rcfile.AircompressorCodecFactory; -import io.trino.rcfile.HadoopCodecFactory; -import io.trino.rcfile.RcFileEncoding; -import io.trino.rcfile.RcFileWriter; -import io.trino.rcfile.binary.BinaryRcFileEncoding; -import io.trino.rcfile.text.TextRcFileEncoding; import io.trino.spi.Page; import io.trino.spi.connector.ConnectorSession; import io.trino.spi.type.Type; From 276630ffdfcdaef8b6c59326f14e8a33fd08276f Mon Sep 17 00:00:00 2001 From: Dain Sundstrom Date: Sat, 27 Aug 2022 17:01:04 -0700 Subject: [PATCH 03/10] Extract compression code from RCFile code --- .../AircompressorCodecFactory.java | 12 ++++++------ .../AircompressorCompressor.java | 7 +++---- .../AircompressorDecompressor.java | 5 +++-- .../BufferedOutputStreamSliceOutput.java | 2 +- .../ChunkedSliceOutput.java | 2 +- .../CodecFactory.java} | 8 ++++---- .../Compressor.java} | 4 ++-- .../Decompressor.java} | 5 +++-- .../HadoopCodecFactory.java | 8 ++++---- .../{rcfile => compression}/HadoopCompressor.java | 7 +++---- .../HadoopDecompressor.java | 8 ++++---- .../{rcfile => compression}/NoneCompressor.java | 6 +++--- .../trino/hive/formats/rcfile/RcFileReader.java | 14 ++++++++------ .../trino/hive/formats/rcfile/RcFileWriter.java | 15 +++++++++------ .../TestBufferedOutputStreamSliceOutput.java | 2 +- .../trino/hive/formats/rcfile/RcFileTester.java | 2 ++ .../formats/rcfile/TestRcFileReaderManual.java | 9 ++++++--- .../io/trino/plugin/hive/RcFileFileWriter.java | 4 ++-- .../hive/rcfile/RcFilePageSourceFactory.java | 4 ++-- .../hive/benchmark/StandardFileFormats.java | 4 ++-- 20 files changed, 69 insertions(+), 59 deletions(-) rename lib/trino-hive-formats/src/main/java/io/trino/hive/formats/{rcfile => compression}/AircompressorCodecFactory.java (89%) rename lib/trino-hive-formats/src/main/java/io/trino/hive/formats/{rcfile => compression}/AircompressorCompressor.java (93%) rename lib/trino-hive-formats/src/main/java/io/trino/hive/formats/{rcfile => compression}/AircompressorDecompressor.java (91%) rename lib/trino-hive-formats/src/main/java/io/trino/hive/formats/{rcfile => compression}/BufferedOutputStreamSliceOutput.java (99%) rename lib/trino-hive-formats/src/main/java/io/trino/hive/formats/{rcfile => compression}/ChunkedSliceOutput.java (99%) rename lib/trino-hive-formats/src/main/java/io/trino/hive/formats/{rcfile/RcFileCodecFactory.java => compression/CodecFactory.java} (74%) rename lib/trino-hive-formats/src/main/java/io/trino/hive/formats/{rcfile/RcFileCompressor.java => compression/Compressor.java} (98%) rename lib/trino-hive-formats/src/main/java/io/trino/hive/formats/{rcfile/RcFileDecompressor.java => compression/Decompressor.java} (84%) rename lib/trino-hive-formats/src/main/java/io/trino/hive/formats/{rcfile => compression}/HadoopCodecFactory.java (91%) rename lib/trino-hive-formats/src/main/java/io/trino/hive/formats/{rcfile => compression}/HadoopCompressor.java (93%) rename lib/trino-hive-formats/src/main/java/io/trino/hive/formats/{rcfile => compression}/HadoopDecompressor.java (90%) rename lib/trino-hive-formats/src/main/java/io/trino/hive/formats/{rcfile => compression}/NoneCompressor.java (93%) rename lib/trino-hive-formats/src/test/java/io/trino/hive/formats/{rcfile => compression}/TestBufferedOutputStreamSliceOutput.java (98%) diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorCodecFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodecFactory.java similarity index 89% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorCodecFactory.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodecFactory.java index b33bd764a9e5..f5cacb05475c 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorCodecFactory.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodecFactory.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.hive.formats.compression; import io.airlift.compress.gzip.JdkGzipCodec; import io.airlift.compress.lz4.Lz4Codec; @@ -21,7 +21,7 @@ import static java.util.Objects.requireNonNull; public class AircompressorCodecFactory - implements RcFileCodecFactory + implements CodecFactory { private static final String SNAPPY_CODEC_NAME = "org.apache.hadoop.io.compress.SnappyCodec"; private static final String LZO_CODEC_NAME = "com.hadoop.compression.lzo.LzoCodec"; @@ -30,15 +30,15 @@ public class AircompressorCodecFactory private static final String LZ4_HC_CODEC_NAME = "org.apache.hadoop.io.compress.Lz4Codec"; private static final String GZIP_CODEC_NAME = "org.apache.hadoop.io.compress.GzipCodec"; - private final RcFileCodecFactory delegate; + private final CodecFactory delegate; - public AircompressorCodecFactory(RcFileCodecFactory delegate) + public AircompressorCodecFactory(CodecFactory delegate) { this.delegate = requireNonNull(delegate, "delegate is null"); } @Override - public RcFileCompressor createCompressor(String codecName) + public Compressor createCompressor(String codecName) { if (SNAPPY_CODEC_NAME.equals(codecName)) { return new AircompressorCompressor(new SnappyCodec()); @@ -56,7 +56,7 @@ public RcFileCompressor createCompressor(String codecName) } @Override - public RcFileDecompressor createDecompressor(String codecName) + public Decompressor createDecompressor(String codecName) { if (SNAPPY_CODEC_NAME.equals(codecName)) { return new AircompressorDecompressor(new SnappyCodec()); diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCompressor.java similarity index 93% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorCompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCompressor.java index 39b7d62a7941..15eb3a77cf55 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorCompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCompressor.java @@ -11,11 +11,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.hive.formats.compression; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionOutputStream; -import org.apache.hadoop.io.compress.Compressor; import java.io.IOException; import java.io.UncheckedIOException; @@ -24,7 +23,7 @@ import static java.util.Objects.requireNonNull; public class AircompressorCompressor - implements RcFileCompressor + implements Compressor { private final CompressionCodec codec; @@ -43,7 +42,7 @@ private static class AircompressorCompressedSliceOutputSupplier implements Supplier { private final CompressionCodec codec; - private final Compressor compressor; + private final org.apache.hadoop.io.compress.Compressor compressor; private final ChunkedSliceOutput compressedOutput; public AircompressorCompressedSliceOutputSupplier(CompressionCodec codec, int minChunkSize, int maxChunkSize) diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorDecompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorDecompressor.java similarity index 91% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorDecompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorDecompressor.java index 2bce5ed4ab93..28052df28f96 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/AircompressorDecompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorDecompressor.java @@ -11,9 +11,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.hive.formats.compression; import io.airlift.slice.Slice; +import io.trino.hive.formats.rcfile.RcFileCorruptionException; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionInputStream; @@ -22,7 +23,7 @@ import static java.util.Objects.requireNonNull; public class AircompressorDecompressor - implements RcFileDecompressor + implements Decompressor { private final CompressionCodec codec; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/BufferedOutputStreamSliceOutput.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/BufferedOutputStreamSliceOutput.java similarity index 99% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/BufferedOutputStreamSliceOutput.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/BufferedOutputStreamSliceOutput.java index 7e96f0ec751e..0dcb3e009d7f 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/BufferedOutputStreamSliceOutput.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/BufferedOutputStreamSliceOutput.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.hive.formats.compression; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ChunkedSliceOutput.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ChunkedSliceOutput.java similarity index 99% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ChunkedSliceOutput.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ChunkedSliceOutput.java index bcc71947a0cb..740c223f9ffa 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ChunkedSliceOutput.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ChunkedSliceOutput.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.hive.formats.compression; import com.google.common.collect.ImmutableList; import io.airlift.slice.Slice; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCodecFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/CodecFactory.java similarity index 74% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCodecFactory.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/CodecFactory.java index 9ae77789efa8..86121fa559a7 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCodecFactory.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/CodecFactory.java @@ -11,11 +11,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.hive.formats.compression; -public interface RcFileCodecFactory +public interface CodecFactory { - RcFileCompressor createCompressor(String codecName); + Compressor createCompressor(String codecName); - RcFileDecompressor createDecompressor(String codecName); + Decompressor createDecompressor(String codecName); } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Compressor.java similarity index 98% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Compressor.java index 632a57cd1df9..acff267712ef 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileCompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Compressor.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.hive.formats.compression; import io.airlift.slice.Slice; @@ -23,7 +23,7 @@ import static com.google.common.base.Preconditions.checkState; import static java.util.Objects.requireNonNull; -public interface RcFileCompressor +public interface Compressor { CompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize); diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDecompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Decompressor.java similarity index 84% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDecompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Decompressor.java index 980110579b18..a0c2751ae0ce 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDecompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Decompressor.java @@ -11,11 +11,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.hive.formats.compression; import io.airlift.slice.Slice; +import io.trino.hive.formats.rcfile.RcFileCorruptionException; -public interface RcFileDecompressor +public interface Decompressor { void decompress(Slice compressed, Slice uncompressed) throws RcFileCorruptionException; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopCodecFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodecFactory.java similarity index 91% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopCodecFactory.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodecFactory.java index d78b02e07948..0c4d97df2e84 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopCodecFactory.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodecFactory.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.hive.formats.compression; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.io.compress.CompressionCodec; @@ -21,7 +21,7 @@ import static io.trino.hadoop.ConfigurationInstantiator.newEmptyConfiguration; public class HadoopCodecFactory - implements RcFileCodecFactory + implements CodecFactory { private final ClassLoader classLoader; @@ -31,14 +31,14 @@ public HadoopCodecFactory(ClassLoader classLoader) } @Override - public RcFileCompressor createCompressor(String codecName) + public Compressor createCompressor(String codecName) { CompressionCodec codec = createCompressionCodec(codecName); return new HadoopCompressor(codec); } @Override - public RcFileDecompressor createDecompressor(String codecName) + public Decompressor createDecompressor(String codecName) { CompressionCodec codec = createCompressionCodec(codecName); return new HadoopDecompressor(codec); diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCompressor.java similarity index 93% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopCompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCompressor.java index 57a8d8d17f23..91a8d148a44d 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopCompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCompressor.java @@ -11,12 +11,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.hive.formats.compression; import org.apache.hadoop.io.compress.CodecPool; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionOutputStream; -import org.apache.hadoop.io.compress.Compressor; import java.io.IOException; import java.io.UncheckedIOException; @@ -25,7 +24,7 @@ import static java.util.Objects.requireNonNull; public class HadoopCompressor - implements RcFileCompressor + implements Compressor { private final CompressionCodec codec; @@ -44,7 +43,7 @@ private static class HadoopCompressedSliceOutputSupplier implements Supplier { private final CompressionCodec codec; - private final Compressor compressor; + private final org.apache.hadoop.io.compress.Compressor compressor; private final ChunkedSliceOutput bufferedOutput; public HadoopCompressedSliceOutputSupplier(CompressionCodec codec, int minChunkSize, int maxChunkSize) diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopDecompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopDecompressor.java similarity index 90% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopDecompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopDecompressor.java index aa50954f7c48..408e3639317e 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/HadoopDecompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopDecompressor.java @@ -11,13 +11,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.hive.formats.compression; import io.airlift.slice.Slice; +import io.trino.hive.formats.rcfile.RcFileCorruptionException; import org.apache.hadoop.io.compress.CodecPool; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionInputStream; -import org.apache.hadoop.io.compress.Decompressor; import java.io.IOException; @@ -25,10 +25,10 @@ import static java.util.Objects.requireNonNull; public class HadoopDecompressor - implements RcFileDecompressor + implements Decompressor { private final CompressionCodec codec; - private final Decompressor decompressor; + private final org.apache.hadoop.io.compress.Decompressor decompressor; private boolean destroyed; public HadoopDecompressor(CompressionCodec codec) diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/NoneCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/NoneCompressor.java similarity index 93% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/NoneCompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/NoneCompressor.java index beac358dd15c..f15065585ee2 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/NoneCompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/NoneCompressor.java @@ -11,12 +11,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.hive.formats.compression; import java.util.function.Supplier; -class NoneCompressor - implements RcFileCompressor +public class NoneCompressor + implements Compressor { @Override public CompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize) diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java index 01f1464bd75f..62310a66fd9d 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java @@ -23,6 +23,8 @@ import io.airlift.slice.Slices; import io.airlift.units.DataSize; import io.airlift.units.DataSize.Unit; +import io.trino.hive.formats.compression.CodecFactory; +import io.trino.hive.formats.compression.Decompressor; import io.trino.hive.formats.rcfile.RcFileWriteValidation.WriteChecksum; import io.trino.hive.formats.rcfile.RcFileWriteValidation.WriteChecksumBuilder; import io.trino.spi.Page; @@ -76,7 +78,7 @@ public class RcFileReader private final byte version; - private final RcFileDecompressor decompressor; + private final Decompressor decompressor; private final Map metadata; private final int columnCount; @@ -106,7 +108,7 @@ public RcFileReader( RcFileDataSource dataSource, RcFileEncoding encoding, Map readColumns, - RcFileCodecFactory codecFactory, + CodecFactory codecFactory, long offset, long length, DataSize bufferSize) @@ -119,7 +121,7 @@ private RcFileReader( RcFileDataSource dataSource, RcFileEncoding encoding, Map readColumns, - RcFileCodecFactory codecFactory, + CodecFactory codecFactory, long offset, long length, DataSize bufferSize, @@ -510,7 +512,7 @@ static void validateFile( RcFileDataSource input, RcFileEncoding encoding, List types, - RcFileCodecFactory codecFactory) + CodecFactory codecFactory) throws RcFileCorruptionException { ImmutableMap.Builder readTypes = ImmutableMap.builder(); @@ -541,7 +543,7 @@ static void validateFile( private static class Column { private final ColumnEncoding encoding; - private final RcFileDecompressor decompressor; + private final Decompressor decompressor; private BasicSliceInput lengthsInput; private Slice dataBuffer; @@ -557,7 +559,7 @@ private static class Column private int runLength; private int lastValueLength = -1; - public Column(ColumnEncoding encoding, RcFileDecompressor decompressor) + public Column(ColumnEncoding encoding, Decompressor decompressor) { this.encoding = encoding; this.decompressor = decompressor; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java index 560e7f4e2202..6020e8ba2de9 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java @@ -18,7 +18,10 @@ import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; import io.airlift.units.DataSize; -import io.trino.hive.formats.rcfile.RcFileCompressor.CompressedSliceOutput; +import io.trino.hive.formats.compression.CodecFactory; +import io.trino.hive.formats.compression.Compressor; +import io.trino.hive.formats.compression.Compressor.CompressedSliceOutput; +import io.trino.hive.formats.compression.NoneCompressor; import io.trino.hive.formats.rcfile.RcFileWriteValidation.RcFileWriteValidationBuilder; import io.trino.spi.Page; import io.trino.spi.block.Block; @@ -70,7 +73,7 @@ public class RcFileWriter private final SliceOutput output; private final List types; private final RcFileEncoding encoding; - private final RcFileCodecFactory codecFactory; + private final CodecFactory codecFactory; private final long syncFirst = ThreadLocalRandom.current().nextLong(); private final long syncSecond = ThreadLocalRandom.current().nextLong(); @@ -94,7 +97,7 @@ public RcFileWriter( List types, RcFileEncoding encoding, Optional codecName, - RcFileCodecFactory codecFactory, + CodecFactory codecFactory, Map metadata, boolean validate) throws IOException @@ -116,7 +119,7 @@ public RcFileWriter( List types, RcFileEncoding encoding, Optional codecName, - RcFileCodecFactory codecFactory, + CodecFactory codecFactory, Map metadata, DataSize targetMinRowGroupSize, DataSize targetMaxRowGroupSize, @@ -168,7 +171,7 @@ public RcFileWriter( recordValidation(validation -> validation.setSyncSecond(syncSecond)); // initialize columns - RcFileCompressor compressor = codecName.map(codecFactory::createCompressor).orElse(new NoneCompressor()); + Compressor compressor = codecName.map(codecFactory::createCompressor).orElse(new NoneCompressor()); keySectionOutput = compressor.createCompressedSliceOutput((int) MIN_BUFFER_SIZE.toBytes(), (int) MAX_BUFFER_SIZE.toBytes()); keySectionOutput.close(); // output is recycled on first use which requires output to be closed columnEncoders = new ColumnEncoder[types.size()]; @@ -340,7 +343,7 @@ private static class ColumnEncoder private boolean columnClosed; - public ColumnEncoder(ColumnEncoding columnEncoding, RcFileCompressor compressor) + public ColumnEncoder(ColumnEncoding columnEncoding, Compressor compressor) { this.columnEncoding = columnEncoding; this.output = compressor.createCompressedSliceOutput((int) MIN_BUFFER_SIZE.toBytes(), (int) MAX_BUFFER_SIZE.toBytes()); diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestBufferedOutputStreamSliceOutput.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/compression/TestBufferedOutputStreamSliceOutput.java similarity index 98% rename from lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestBufferedOutputStreamSliceOutput.java rename to lib/trino-hive-formats/src/test/java/io/trino/hive/formats/compression/TestBufferedOutputStreamSliceOutput.java index 218de61a9dcf..f8b20a6f76c0 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestBufferedOutputStreamSliceOutput.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/compression/TestBufferedOutputStreamSliceOutput.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.hive.formats.compression; import io.airlift.slice.Slice; import io.airlift.slice.Slices; diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java index 6a6698d55361..028630c7233c 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java @@ -24,6 +24,8 @@ import io.airlift.slice.Slices; import io.airlift.units.DataSize; import io.trino.hadoop.HadoopNative; +import io.trino.hive.formats.compression.AircompressorCodecFactory; +import io.trino.hive.formats.compression.HadoopCodecFactory; import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding; import io.trino.hive.formats.rcfile.text.TextRcFileEncoding; import io.trino.spi.Page; diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java index 2ce0cdc80398..567761a03679 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java @@ -19,6 +19,9 @@ import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; import io.airlift.units.DataSize; +import io.trino.hive.formats.compression.CodecFactory; +import io.trino.hive.formats.compression.Compressor; +import io.trino.hive.formats.compression.Decompressor; import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding; import io.trino.spi.block.Block; import org.joda.time.DateTimeZone; @@ -292,16 +295,16 @@ public List getRowGroupSegmentOffsets() } private static class BogusRcFileCodecFactory - implements RcFileCodecFactory + implements CodecFactory { @Override - public RcFileCompressor createCompressor(String codecName) + public Compressor createCompressor(String codecName) { throw new UnsupportedOperationException(); } @Override - public RcFileDecompressor createDecompressor(String codecName) + public Decompressor createDecompressor(String codecName) { throw new UnsupportedOperationException(); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java index 364de626a182..8b7a99b56285 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java @@ -16,8 +16,8 @@ import com.google.common.collect.ImmutableList; import com.google.common.io.CountingOutputStream; import io.airlift.slice.OutputStreamSliceOutput; -import io.trino.hive.formats.rcfile.AircompressorCodecFactory; -import io.trino.hive.formats.rcfile.HadoopCodecFactory; +import io.trino.hive.formats.compression.AircompressorCodecFactory; +import io.trino.hive.formats.compression.HadoopCodecFactory; import io.trino.hive.formats.rcfile.RcFileDataSource; import io.trino.hive.formats.rcfile.RcFileEncoding; import io.trino.hive.formats.rcfile.RcFileWriter; diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java index 940c4006bad7..8d9370ff17b2 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java @@ -20,8 +20,8 @@ import io.airlift.units.DataSize.Unit; import io.trino.hdfs.FSDataInputStreamTail; import io.trino.hdfs.HdfsEnvironment; -import io.trino.hive.formats.rcfile.AircompressorCodecFactory; -import io.trino.hive.formats.rcfile.HadoopCodecFactory; +import io.trino.hive.formats.compression.AircompressorCodecFactory; +import io.trino.hive.formats.compression.HadoopCodecFactory; import io.trino.hive.formats.rcfile.MemoryRcFileDataSource; import io.trino.hive.formats.rcfile.RcFileCorruptionException; import io.trino.hive.formats.rcfile.RcFileDataSource; diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java index b16adda5e896..a0203470d3c1 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java @@ -17,8 +17,8 @@ import io.airlift.slice.OutputStreamSliceOutput; import io.trino.filesystem.hdfs.HdfsFileSystemFactory; import io.trino.hdfs.HdfsEnvironment; -import io.trino.hive.formats.rcfile.AircompressorCodecFactory; -import io.trino.hive.formats.rcfile.HadoopCodecFactory; +import io.trino.hive.formats.compression.AircompressorCodecFactory; +import io.trino.hive.formats.compression.HadoopCodecFactory; import io.trino.hive.formats.rcfile.RcFileEncoding; import io.trino.hive.formats.rcfile.RcFileWriter; import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding; From 56bcc2a8ab575fb4e585c7aa68c81d97c291702e Mon Sep 17 00:00:00 2001 From: Dain Sundstrom Date: Sat, 27 Aug 2022 17:35:36 -0700 Subject: [PATCH 04/10] Cleanup hive-formats compressor code --- .../AircompressorCodecFactory.java | 41 +++++++------------ .../AircompressorDecompressor.java | 4 +- 2 files changed, 15 insertions(+), 30 deletions(-) diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodecFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodecFactory.java index f5cacb05475c..f5e1960e8848 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodecFactory.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodecFactory.java @@ -27,7 +27,6 @@ public class AircompressorCodecFactory private static final String LZO_CODEC_NAME = "com.hadoop.compression.lzo.LzoCodec"; private static final String LZO_CODEC_NAME_DEPRECATED = "org.apache.hadoop.io.compress.LzoCodec"; private static final String LZ4_CODEC_NAME = "org.apache.hadoop.io.compress.Lz4Codec"; - private static final String LZ4_HC_CODEC_NAME = "org.apache.hadoop.io.compress.Lz4Codec"; private static final String GZIP_CODEC_NAME = "org.apache.hadoop.io.compress.GzipCodec"; private final CodecFactory delegate; @@ -40,36 +39,24 @@ public AircompressorCodecFactory(CodecFactory delegate) @Override public Compressor createCompressor(String codecName) { - if (SNAPPY_CODEC_NAME.equals(codecName)) { - return new AircompressorCompressor(new SnappyCodec()); - } - if (LZO_CODEC_NAME.equals(codecName) || LZO_CODEC_NAME_DEPRECATED.equals(codecName)) { - return new AircompressorCompressor(new LzoCodec()); - } - if (LZ4_CODEC_NAME.equals(codecName)) { - return new AircompressorCompressor(new Lz4Codec()); - } - if (GZIP_CODEC_NAME.equals(codecName)) { - return new AircompressorCompressor(new JdkGzipCodec()); - } - return delegate.createCompressor(codecName); + return switch (codecName) { + case SNAPPY_CODEC_NAME -> new AircompressorCompressor(new SnappyCodec()); + case LZO_CODEC_NAME, LZO_CODEC_NAME_DEPRECATED -> new AircompressorCompressor(new LzoCodec()); + case LZ4_CODEC_NAME -> new AircompressorCompressor(new Lz4Codec()); + case GZIP_CODEC_NAME -> new AircompressorCompressor(new JdkGzipCodec()); + default -> delegate.createCompressor(codecName); + }; } @Override public Decompressor createDecompressor(String codecName) { - if (SNAPPY_CODEC_NAME.equals(codecName)) { - return new AircompressorDecompressor(new SnappyCodec()); - } - if (LZO_CODEC_NAME.equals(codecName) || LZO_CODEC_NAME_DEPRECATED.equals(codecName)) { - return new AircompressorDecompressor(new LzoCodec()); - } - if (LZ4_CODEC_NAME.equals(codecName) || LZ4_HC_CODEC_NAME.equals(codecName)) { - return new AircompressorDecompressor(new Lz4Codec()); - } - if (GZIP_CODEC_NAME.equals(codecName)) { - return new AircompressorDecompressor(new JdkGzipCodec()); - } - return delegate.createDecompressor(codecName); + return switch (codecName) { + case SNAPPY_CODEC_NAME -> new AircompressorDecompressor(new SnappyCodec()); + case LZO_CODEC_NAME, LZO_CODEC_NAME_DEPRECATED -> new AircompressorDecompressor(new LzoCodec()); + case LZ4_CODEC_NAME -> new AircompressorDecompressor(new Lz4Codec()); + case GZIP_CODEC_NAME -> new AircompressorDecompressor(new JdkGzipCodec()); + default -> delegate.createDecompressor(codecName); + }; } } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorDecompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorDecompressor.java index 28052df28f96..100ea9a76edb 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorDecompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorDecompressor.java @@ -45,7 +45,5 @@ public void decompress(Slice compressed, Slice uncompressed) } @Override - public void destroy() - { - } + public void destroy() {} } From ccba5a5f8c4409f22b53c78bd8e891e4cce2b4e9 Mon Sep 17 00:00:00 2001 From: Dain Sundstrom Date: Sun, 28 Aug 2022 14:35:10 -0700 Subject: [PATCH 05/10] Fix IntelliJ warnings in RCFile code --- .../compression/HadoopCodecFactory.java | 4 ++-- .../hive/formats/rcfile/RcFileReader.java | 5 ++--- .../formats/rcfile/RcFileWriteValidation.java | 21 +++++++------------ .../hive/formats/rcfile/RcFileWriter.java | 2 +- .../hive/formats/rcfile/TimestampHolder.java | 2 +- .../hive/formats/rcfile/ValidationHash.java | 2 +- .../formats/rcfile/binary/BinaryEncoding.java | 2 +- .../rcfile/binary/BinaryRcFileEncoding.java | 2 +- .../formats/rcfile/binary/ShortEncoding.java | 2 +- .../formats/rcfile/binary/StringEncoding.java | 4 ++-- .../formats/rcfile/binary/StructEncoding.java | 3 +-- .../rcfile/binary/TimestampEncoding.java | 4 ++-- .../formats/rcfile/text/DateEncoding.java | 1 - 13 files changed, 22 insertions(+), 32 deletions(-) diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodecFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodecFactory.java index 0c4d97df2e84..dfeeb10f20aa 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodecFactory.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodecFactory.java @@ -52,8 +52,8 @@ private CompressionCodec createCompressionCodec(String codecName) constructor.setAccessible(true); CompressionCodec codec = constructor.newInstance(); if (codec instanceof Configurable) { - // Hadoop is crazy... you have to give codecs an empty configuration or they throw NPEs - // but you need to make sure the configuration doesn't "load" defaults or it spends + // Hadoop is crazy... you have to give codecs an empty configuration, or they throw NPEs, + // but you need to make sure the configuration doesn't "load" defaults, or it spends // forever loading XML with no useful information ((Configurable) codec).setConf(newEmptyConfiguration()); } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java index 62310a66fd9d..cc6fbb0ca0c6 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java @@ -199,6 +199,7 @@ else if (SEQUENCE_FILE_MAGIC.equals(magic)) { // get column count from metadata String columnCountString = metadata.get(COLUMN_COUNT_METADATA_KEY); + verify(columnCountString != null, "Column count not specified in metadata RCFile %s", dataSource); try { columnCount = Integer.parseInt(columnCountString); } @@ -490,9 +491,7 @@ private void validateWrite(Predicate test, String message private void validateWriteRowGroupChecksum() { - if (writeChecksumBuilder.isPresent()) { - writeChecksumBuilder.get().addRowGroup(rowGroupRowCount); - } + writeChecksumBuilder.ifPresent(checksumBuilder -> checksumBuilder.addRowGroup(rowGroupRowCount)); } private void validateWritePageChecksum() diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriteValidation.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriteValidation.java index 2deed4a6fbc7..668c43efd4dd 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriteValidation.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriteValidation.java @@ -200,46 +200,39 @@ public RcFileWriteValidationBuilder(List types) this.checksum = new WriteChecksumBuilder(types); } - public RcFileWriteValidationBuilder setVersion(byte version) + public void setVersion(byte version) { this.version = version; - return this; } - public RcFileWriteValidationBuilder addMetadataProperty(String key, String value) + public void addMetadataProperty(String key, String value) { metadata.put(key, value); - return this; } - public RcFileWriteValidationBuilder setCodecClassName(Optional codecClassName) + public void setCodecClassName(Optional codecClassName) { this.codecClassName = codecClassName; - return this; } - public RcFileWriteValidationBuilder setSyncFirst(long syncFirst) + public void setSyncFirst(long syncFirst) { this.syncFirst = syncFirst; - return this; } - public RcFileWriteValidationBuilder setSyncSecond(long syncSecond) + public void setSyncSecond(long syncSecond) { this.syncSecond = syncSecond; - return this; } - public RcFileWriteValidationBuilder addRowGroup(int rowCount) + public void addRowGroup(int rowCount) { checksum.addRowGroup(rowCount); - return this; } - public RcFileWriteValidationBuilder addPage(Page page) + public void addPage(Page page) { checksum.addPage(page); - return this; } public RcFileWriteValidation build() diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java index 6020e8ba2de9..e8b6962e5bd6 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java @@ -173,7 +173,7 @@ public RcFileWriter( // initialize columns Compressor compressor = codecName.map(codecFactory::createCompressor).orElse(new NoneCompressor()); keySectionOutput = compressor.createCompressedSliceOutput((int) MIN_BUFFER_SIZE.toBytes(), (int) MAX_BUFFER_SIZE.toBytes()); - keySectionOutput.close(); // output is recycled on first use which requires output to be closed + keySectionOutput.close(); // output is recycled on first use which requires the output to be closed columnEncoders = new ColumnEncoder[types.size()]; for (int columnIndex = 0; columnIndex < types.size(); columnIndex++) { Type type = types.get(columnIndex); diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/TimestampHolder.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/TimestampHolder.java index 9515602364d5..cb0f38341984 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/TimestampHolder.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/TimestampHolder.java @@ -33,7 +33,7 @@ public final class TimestampHolder private final long seconds; private final int nanosOfSecond; - public TimestampHolder(long epochMicros, int picosOfMicro) + private TimestampHolder(long epochMicros, int picosOfMicro) { this.seconds = floorDiv(epochMicros, MICROSECONDS_PER_SECOND); long picosOfSecond = (long) floorMod(epochMicros, MICROSECONDS_PER_SECOND) * PICOSECONDS_PER_MICROSECOND + picosOfMicro; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ValidationHash.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ValidationHash.java index 1ead4db8192d..dd1f992b0fc5 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ValidationHash.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/ValidationHash.java @@ -59,7 +59,7 @@ class ValidationHash } } - // This should really come from the environment, but there is not good way to get a value here + // This should really come from the environment, but there is no good way to get a value here private static final TypeOperators VALIDATION_TYPE_OPERATORS_CACHE = new TypeOperators(); public static ValidationHash createValidationHash(Type type) diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryEncoding.java index e7829a69162f..1d882e344df2 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryEncoding.java @@ -55,7 +55,7 @@ public void encodeColumn(Block block, SliceOutput output, EncodeOutput encodeOut public void encodeValueInto(Block block, int position, SliceOutput output) { Slice slice = type.getSlice(block, position); - // Note binary nested in complex structures do no use the empty marker. + // Note binary nested in complex structures do not use the empty marker. // Therefore, empty VARBINARY values are ok. writeVInt(output, slice.length()); output.writeBytes(slice); diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryRcFileEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryRcFileEncoding.java index 1f77eaa1cb11..60fd3760b832 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryRcFileEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryRcFileEncoding.java @@ -127,7 +127,7 @@ public ColumnEncoding structEncoding(Type type, List fieldEncodi return new StructEncoding( type, fieldEncodings.stream() - .map(field -> (BinaryColumnEncoding) field) + .map(BinaryColumnEncoding.class::cast) .collect(Collectors.toList())); } } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ShortEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ShortEncoding.java index 1db68d26381d..d3b8a2ccecbc 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ShortEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ShortEncoding.java @@ -62,7 +62,7 @@ public Block decodeColumn(ColumnData columnData) int length = columnData.getLength(i); if (length != 0) { checkState(length == SIZE_OF_SHORT, "Short should be 2 bytes"); - type.writeLong(builder, (long) Short.reverseBytes(slice.getShort(columnData.getOffset(i)))); + type.writeLong(builder, Short.reverseBytes(slice.getShort(columnData.getOffset(i)))); } else { builder.appendNull(); diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StringEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StringEncoding.java index df528a00f938..ac5c335685a0 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StringEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StringEncoding.java @@ -61,7 +61,7 @@ public void encodeColumn(Block block, SliceOutput output, EncodeOutput encodeOut public void encodeValueInto(Block block, int position, SliceOutput output) { Slice slice = type.getSlice(block, position); - // Note strings nested in complex structures do no use the empty string marker + // Note strings nested in complex structures do not use the empty string marker writeVInt(output, slice.length()); output.writeBytes(slice); } @@ -107,7 +107,7 @@ public int getValueLength(Slice slice, int offset) @Override public void decodeValueInto(BlockBuilder builder, Slice slice, int offset, int length) { - // Note strings nested in complex structures do no use the empty string marker + // Note strings nested in complex structures do not use the empty string marker length = calculateTruncationLength(type, slice, offset, length); type.writeSlice(builder, slice, offset, length); } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StructEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StructEncoding.java index c45f2cf210fd..1532be23fad9 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StructEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StructEncoding.java @@ -89,8 +89,7 @@ public void decodeValueInto(BlockBuilder builder, Slice slice, int offset, int l fieldId++; } - // Some times a struct does not have all fields written - // so we fill with nulls + // Sometimes a struct does not have all fields written, so we fill with nulls while (fieldId < structFields.size()) { rowBuilder.appendNull(); fieldId++; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/TimestampEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/TimestampEncoding.java index ef0e8ba2c75e..924aebb2fc0a 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/TimestampEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/TimestampEncoding.java @@ -120,7 +120,7 @@ private static boolean hasNanosVInt(byte b) return (b >> 7) != 0; } - private DecodedTimestamp getTimestamp(Slice slice, int offset) + private static DecodedTimestamp getTimestamp(Slice slice, int offset) { // read seconds (low 32 bits) int lowest31BitsOfSecondsAndFlag = Integer.reverseBytes(slice.getInt(offset)); @@ -130,7 +130,7 @@ private DecodedTimestamp getTimestamp(Slice slice, int offset) int nanos = 0; if (lowest31BitsOfSecondsAndFlag < 0) { // read nanos - // this is an inline version of readVint so it can be stitched together + // this is an inline version of readVint, so it can be stitched together // the code to read the seconds high bits below byte nanosFirstByte = slice.getByte(offset); int nanosLength = RcFileDecoderUtils.decodeVIntSize(nanosFirstByte); diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DateEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DateEncoding.java index a57ef20b00d1..e1bb3f9628d8 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DateEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/DateEncoding.java @@ -88,7 +88,6 @@ public Block decodeColumn(ColumnData columnData) builder.appendNull(); } else { - //noinspection deprecation type.writeLong(builder, parseDate(slice, offset, length)); } } From b4ff111f71874208ce568e5362caace4047166da Mon Sep 17 00:00:00 2001 From: Dain Sundstrom Date: Sun, 28 Aug 2022 13:32:00 -0700 Subject: [PATCH 06/10] Improve safety of RCFile compressed header buffer Slice RCFile compressed header buffer to exact size to avoid future bugs where a compression format attempts to over read the actual compressed data region. --- .../java/io/trino/hive/formats/rcfile/RcFileReader.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java index cc6fbb0ca0c6..397458e8f6e8 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java @@ -362,7 +362,9 @@ else if (rowsRead > 0) { if (compressedHeaderSize > compressedHeaderBuffer.length()) { compressedHeaderBuffer = Slices.allocate(compressedHeaderSize); } - input.readBytes(compressedHeaderBuffer, 0, compressedHeaderSize); + // use exact sized compressed header to avoid problems where compression algorithms over read + Slice compressedHeader = compressedHeaderBuffer.slice(0, compressedHeaderSize); + input.readBytes(compressedHeader); // decompress row group header Slice header; @@ -372,13 +374,13 @@ else if (rowsRead > 0) { } Slice buffer = headerBuffer.slice(0, uncompressedHeaderSize); - decompressor.decompress(compressedHeaderBuffer, buffer); + decompressor.decompress(compressedHeader, buffer); header = buffer; } else { verify(compressedHeaderSize == uncompressedHeaderSize, "Invalid RCFile %s", dataSource.getId()); - header = compressedHeaderBuffer; + header = compressedHeader; } BasicSliceInput headerInput = header.getInput(); From fac9b8173627e806d116b213a7de76991f83d8a3 Mon Sep 17 00:00:00 2001 From: Dain Sundstrom Date: Sun, 28 Aug 2022 11:38:35 -0700 Subject: [PATCH 07/10] Extract MemoryCompressedSliceOutput --- .../compression/AircompressorCompressor.java | 8 +- .../hive/formats/compression/Compressor.java | 92 +-------------- .../formats/compression/HadoopCompressor.java | 8 +- .../MemoryCompressedSliceOutput.java | 108 ++++++++++++++++++ .../formats/compression/NoneCompressor.java | 8 +- .../hive/formats/rcfile/RcFileWriter.java | 6 +- 6 files changed, 124 insertions(+), 106 deletions(-) create mode 100644 lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/MemoryCompressedSliceOutput.java diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCompressor.java index 15eb3a77cf55..af24253aa354 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCompressor.java @@ -33,13 +33,13 @@ public AircompressorCompressor(CompressionCodec codec) } @Override - public CompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize) + public MemoryCompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize) { return new AircompressorCompressedSliceOutputSupplier(codec, minChunkSize, maxChunkSize).get(); } private static class AircompressorCompressedSliceOutputSupplier - implements Supplier + implements Supplier { private final CompressionCodec codec; private final org.apache.hadoop.io.compress.Compressor compressor; @@ -53,13 +53,13 @@ public AircompressorCompressedSliceOutputSupplier(CompressionCodec codec, int mi } @Override - public CompressedSliceOutput get() + public MemoryCompressedSliceOutput get() { try { compressor.reset(); compressedOutput.reset(); CompressionOutputStream compressionStream = codec.createOutputStream(compressedOutput, compressor); - return new CompressedSliceOutput(compressionStream, compressedOutput, this, () -> {}); + return new MemoryCompressedSliceOutput(compressionStream, compressedOutput, this, () -> {}); } catch (IOException e) { throw new UncheckedIOException(e); diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Compressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Compressor.java index acff267712ef..70f742b18117 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Compressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Compressor.java @@ -13,97 +13,7 @@ */ package io.trino.hive.formats.compression; -import io.airlift.slice.Slice; - -import java.io.IOException; -import java.io.OutputStream; -import java.util.List; -import java.util.function.Supplier; - -import static com.google.common.base.Preconditions.checkState; -import static java.util.Objects.requireNonNull; - public interface Compressor { - CompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize); - - // This specialized SliceOutput has direct access buffered output slices to - // report buffer sizes and to get he final output. Additionally, a new - // CompressedSliceOutput can be created that reuses the underlying output - // buffer - final class CompressedSliceOutput - extends BufferedOutputStreamSliceOutput - { - private final ChunkedSliceOutput bufferedOutput; - private final Supplier resetFactory; - private final Runnable onDestroy; - private boolean closed; - private boolean destroyed; - - /** - * @param compressionStream the compressed output stream to delegate to - * @param bufferedOutput the output for the compressionStream - * @param resetFactory the function to create a new CompressedSliceOutput that reuses the bufferedOutput - * @param onDestroy used to cleanup the compression when done - */ - public CompressedSliceOutput(OutputStream compressionStream, ChunkedSliceOutput bufferedOutput, Supplier resetFactory, Runnable onDestroy) - { - super(compressionStream); - this.bufferedOutput = requireNonNull(bufferedOutput, "bufferedOutput is null"); - this.resetFactory = requireNonNull(resetFactory, "resetFactory is null"); - this.onDestroy = requireNonNull(onDestroy, "onDestroy is null"); - } - - @Override - public long getRetainedSize() - { - return super.getRetainedSize() + bufferedOutput.getRetainedSize(); - } - - public int getCompressedSize() - { - checkState(closed, "Stream has not been closed"); - checkState(!destroyed, "Stream has been destroyed"); - return bufferedOutput.size(); - } - - public List getCompressedSlices() - { - checkState(closed, "Stream has not been closed"); - checkState(!destroyed, "Stream has been destroyed"); - return bufferedOutput.getSlices(); - } - - public CompressedSliceOutput createRecycledCompressedSliceOutput() - { - checkState(closed, "Stream has not been closed"); - checkState(!destroyed, "Stream has been destroyed"); - destroyed = true; - return resetFactory.get(); - } - - @Override - public void close() - throws IOException - { - if (!closed) { - closed = true; - super.close(); - } - } - - public void destroy() - throws IOException - { - if (!destroyed) { - destroyed = true; - try { - close(); - } - finally { - onDestroy.run(); - } - } - } - } + MemoryCompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize); } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCompressor.java index 91a8d148a44d..d3f1352783af 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCompressor.java @@ -34,13 +34,13 @@ public HadoopCompressor(CompressionCodec codec) } @Override - public CompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize) + public MemoryCompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize) { return new HadoopCompressedSliceOutputSupplier(codec, minChunkSize, maxChunkSize).get(); } private static class HadoopCompressedSliceOutputSupplier - implements Supplier + implements Supplier { private final CompressionCodec codec; private final org.apache.hadoop.io.compress.Compressor compressor; @@ -54,13 +54,13 @@ public HadoopCompressedSliceOutputSupplier(CompressionCodec codec, int minChunkS } @Override - public CompressedSliceOutput get() + public MemoryCompressedSliceOutput get() { try { compressor.reset(); bufferedOutput.reset(); CompressionOutputStream compressionStream = codec.createOutputStream(bufferedOutput, compressor); - return new CompressedSliceOutput(compressionStream, bufferedOutput, this, () -> CodecPool.returnCompressor(compressor)); + return new MemoryCompressedSliceOutput(compressionStream, bufferedOutput, this, () -> CodecPool.returnCompressor(compressor)); } catch (IOException e) { throw new UncheckedIOException(e); diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/MemoryCompressedSliceOutput.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/MemoryCompressedSliceOutput.java new file mode 100644 index 000000000000..a7975d4b729a --- /dev/null +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/MemoryCompressedSliceOutput.java @@ -0,0 +1,108 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.hive.formats.compression; + +import io.airlift.slice.Slice; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.List; +import java.util.function.Supplier; + +import static com.google.common.base.Preconditions.checkState; +import static java.util.Objects.requireNonNull; + +// This specialized SliceOutput has direct access buffered output slices to +// report buffer sizes and to get the final output. Additionally, a new +// CompressedSliceOutput can be created that reuses the underlying output +// buffer +public final class MemoryCompressedSliceOutput + extends BufferedOutputStreamSliceOutput +{ + private final ChunkedSliceOutput bufferedOutput; + private final Supplier resetFactory; + private final Runnable onDestroy; + private boolean closed; + private boolean destroyed; + + /** + * @param compressionStream the compressed output stream to delegate to + * @param bufferedOutput the output for the compressionStream + * @param resetFactory the function to create a new CompressedSliceOutput that reuses the bufferedOutput + * @param onDestroy used to cleanup the compression when done + */ + public MemoryCompressedSliceOutput( + OutputStream compressionStream, + ChunkedSliceOutput bufferedOutput, + Supplier resetFactory, + Runnable onDestroy) + { + super(compressionStream); + this.bufferedOutput = requireNonNull(bufferedOutput, "bufferedOutput is null"); + this.resetFactory = requireNonNull(resetFactory, "resetFactory is null"); + this.onDestroy = requireNonNull(onDestroy, "onDestroy is null"); + } + + @Override + public long getRetainedSize() + { + return super.getRetainedSize() + bufferedOutput.getRetainedSize(); + } + + public int getCompressedSize() + { + checkState(closed, "Stream has not been closed"); + checkState(!destroyed, "Stream has been destroyed"); + return bufferedOutput.size(); + } + + public List getCompressedSlices() + { + checkState(closed, "Stream has not been closed"); + checkState(!destroyed, "Stream has been destroyed"); + return bufferedOutput.getSlices(); + } + + public MemoryCompressedSliceOutput createRecycledCompressedSliceOutput() + { + checkState(closed, "Stream has not been closed"); + checkState(!destroyed, "Stream has been destroyed"); + destroyed = true; + return resetFactory.get(); + } + + @Override + public void close() + throws IOException + { + if (!closed) { + closed = true; + super.close(); + } + } + + public void destroy() + throws IOException + { + if (!destroyed) { + destroyed = true; + try { + close(); + } + finally { + onDestroy.run(); + } + } + } +} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/NoneCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/NoneCompressor.java index f15065585ee2..efc5450b01c5 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/NoneCompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/NoneCompressor.java @@ -19,13 +19,13 @@ public class NoneCompressor implements Compressor { @Override - public CompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize) + public MemoryCompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize) { return new NoneCompressedSliceOutputSupplier(minChunkSize, maxChunkSize).get(); } private static class NoneCompressedSliceOutputSupplier - implements Supplier + implements Supplier { private final ChunkedSliceOutput chunkedSliceOutput; @@ -35,10 +35,10 @@ private NoneCompressedSliceOutputSupplier(int minChunkSize, int maxChunkSize) } @Override - public CompressedSliceOutput get() + public MemoryCompressedSliceOutput get() { chunkedSliceOutput.reset(); - return new CompressedSliceOutput(chunkedSliceOutput, chunkedSliceOutput, this, () -> {}); + return new MemoryCompressedSliceOutput(chunkedSliceOutput, chunkedSliceOutput, this, () -> {}); } } } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java index e8b6962e5bd6..18c185cffe7a 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java @@ -20,7 +20,7 @@ import io.airlift.units.DataSize; import io.trino.hive.formats.compression.CodecFactory; import io.trino.hive.formats.compression.Compressor; -import io.trino.hive.formats.compression.Compressor.CompressedSliceOutput; +import io.trino.hive.formats.compression.MemoryCompressedSliceOutput; import io.trino.hive.formats.compression.NoneCompressor; import io.trino.hive.formats.rcfile.RcFileWriteValidation.RcFileWriteValidationBuilder; import io.trino.spi.Page; @@ -78,7 +78,7 @@ public class RcFileWriter private final long syncFirst = ThreadLocalRandom.current().nextLong(); private final long syncSecond = ThreadLocalRandom.current().nextLong(); - private CompressedSliceOutput keySectionOutput; + private MemoryCompressedSliceOutput keySectionOutput; private final ColumnEncoder[] columnEncoders; private final int targetMinRowGroupSize; @@ -339,7 +339,7 @@ private static class ColumnEncoder private final SliceOutput lengthOutput = new DynamicSliceOutput(512); - private CompressedSliceOutput output; + private MemoryCompressedSliceOutput output; private boolean columnClosed; From 2638481d86a4359af4e77c030559751b1865bc0f Mon Sep 17 00:00:00 2001 From: Dain Sundstrom Date: Wed, 18 Jan 2023 09:48:12 -0800 Subject: [PATCH 08/10] Simplify compression interfaces --- .../compression/AircompressorCodec.java | 146 +++++++++++++++ .../AircompressorCodecFactory.java | 62 ------- .../compression/AircompressorCompressor.java | 69 ------- .../AircompressorDecompressor.java | 49 ----- .../{Compressor.java => Codec.java} | 19 +- .../formats/compression/CompressionKind.java | 137 ++++++++++++++ .../hive/formats/compression/HadoopCodec.java | 175 ++++++++++++++++++ .../compression/HadoopCodecFactory.java | 66 ------- .../formats/compression/HadoopCompressor.java | 70 ------- .../compression/HadoopDecompressor.java | 63 ------- .../MemoryCompressedSliceOutput.java | 23 +++ .../formats/compression/NoneCompressor.java | 44 ----- ...CodecFactory.java => ValueCompressor.java} | 14 +- ...compressor.java => ValueDecompressor.java} | 12 +- .../hive/formats/rcfile/RcFileReader.java | 22 +-- .../hive/formats/rcfile/RcFileWriter.java | 48 ++--- .../hive/formats/rcfile/RcFileTester.java | 71 +++---- .../rcfile/TestRcFileReaderManual.java | 20 -- .../plugin/hive/HiveCompressionCodec.java | 23 +-- .../trino/plugin/hive/RcFileFileWriter.java | 8 +- .../plugin/hive/RcFileFileWriterFactory.java | 6 +- .../hive/rcfile/RcFilePageSourceFactory.java | 3 - .../hive/util/CompressionConfigUtil.java | 6 +- .../plugin/hive/TestHiveFileFormats.java | 12 +- .../hive/benchmark/StandardFileFormats.java | 5 +- 25 files changed, 596 insertions(+), 577 deletions(-) create mode 100644 lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodec.java delete mode 100644 lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodecFactory.java delete mode 100644 lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCompressor.java delete mode 100644 lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorDecompressor.java rename lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/{Compressor.java => Codec.java} (53%) create mode 100644 lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/CompressionKind.java create mode 100644 lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodec.java delete mode 100644 lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodecFactory.java delete mode 100644 lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCompressor.java delete mode 100644 lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopDecompressor.java delete mode 100644 lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/NoneCompressor.java rename lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/{CodecFactory.java => ValueCompressor.java} (70%) rename lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/{Decompressor.java => ValueDecompressor.java} (78%) diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodec.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodec.java new file mode 100644 index 000000000000..a5b4d3a46a16 --- /dev/null +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodec.java @@ -0,0 +1,146 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.hive.formats.compression; + +import io.airlift.slice.DynamicSliceOutput; +import io.airlift.slice.Slice; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionInputStream; +import org.apache.hadoop.io.compress.CompressionOutputStream; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.UncheckedIOException; +import java.util.function.Supplier; + +import static java.util.Objects.requireNonNull; + +public class AircompressorCodec + implements Codec +{ + // Airlift Codecs are assumed to not retain memory and are assumed to not be pooled + private final CompressionCodec codec; + + public AircompressorCodec(CompressionCodec codec) + { + this.codec = requireNonNull(codec, "codec is null"); + } + + @Override + public OutputStream createStreamCompressor(OutputStream outputStream) + throws IOException + { + return codec.createOutputStream(outputStream); + } + + @Override + public ValueCompressor createValueCompressor() + { + return new AircompressorValueCompressor(codec); + } + + private static class AircompressorValueCompressor + implements ValueCompressor + { + private final CompressionCodec codec; + private final DynamicSliceOutput buffer; + + private AircompressorValueCompressor(CompressionCodec codec) + { + this.codec = requireNonNull(codec, "codec is null"); + this.buffer = new DynamicSliceOutput(1024); + } + + @Override + public Slice compress(Slice slice) + throws IOException + { + buffer.reset(); + try (CompressionOutputStream compressionStream = codec.createOutputStream(buffer, codec.createCompressor())) { + slice.getInput().transferTo(compressionStream); + } + return buffer.slice(); + } + } + + @Override + public MemoryCompressedSliceOutput createMemoryCompressedSliceOutput(int minChunkSize, int maxChunkSize) + { + return new AircompressorCompressedSliceOutputSupplier(codec, minChunkSize, maxChunkSize).get(); + } + + // this can be dramatically simplified when actual hadoop codecs are dropped + private static class AircompressorCompressedSliceOutputSupplier + implements Supplier + { + private final CompressionCodec codec; + private final ChunkedSliceOutput compressedOutput; + + public AircompressorCompressedSliceOutputSupplier(CompressionCodec codec, int minChunkSize, int maxChunkSize) + { + this.codec = requireNonNull(codec, "codec is null"); + this.compressedOutput = new ChunkedSliceOutput(minChunkSize, maxChunkSize); + } + + @Override + public MemoryCompressedSliceOutput get() + { + try { + compressedOutput.reset(); + CompressionOutputStream compressionStream = codec.createOutputStream(compressedOutput); + return new MemoryCompressedSliceOutput(compressionStream, compressedOutput, this, () -> {}); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + } + } + + @Override + public InputStream createStreamDecompressor(InputStream inputStream) + throws IOException + { + return codec.createInputStream(inputStream); + } + + @Override + public ValueDecompressor createValueDecompressor() + { + return new AircompressorValueDecompressor(codec); + } + + private static class AircompressorValueDecompressor + implements ValueDecompressor + { + private final CompressionCodec codec; + + private AircompressorValueDecompressor(CompressionCodec codec) + { + this.codec = requireNonNull(codec, "codec is null"); + } + + @Override + public void decompress(Slice compressed, Slice uncompressed) + throws IOException + { + try (CompressionInputStream decompressorStream = codec.createInputStream(compressed.getInput())) { + uncompressed.setBytes(0, decompressorStream, uncompressed.length()); + } + catch (IndexOutOfBoundsException | IOException e) { + throw new IOException("Compressed stream is truncated", e); + } + } + } +} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodecFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodecFactory.java deleted file mode 100644 index f5e1960e8848..000000000000 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCodecFactory.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hive.formats.compression; - -import io.airlift.compress.gzip.JdkGzipCodec; -import io.airlift.compress.lz4.Lz4Codec; -import io.airlift.compress.lzo.LzoCodec; -import io.airlift.compress.snappy.SnappyCodec; - -import static java.util.Objects.requireNonNull; - -public class AircompressorCodecFactory - implements CodecFactory -{ - private static final String SNAPPY_CODEC_NAME = "org.apache.hadoop.io.compress.SnappyCodec"; - private static final String LZO_CODEC_NAME = "com.hadoop.compression.lzo.LzoCodec"; - private static final String LZO_CODEC_NAME_DEPRECATED = "org.apache.hadoop.io.compress.LzoCodec"; - private static final String LZ4_CODEC_NAME = "org.apache.hadoop.io.compress.Lz4Codec"; - private static final String GZIP_CODEC_NAME = "org.apache.hadoop.io.compress.GzipCodec"; - - private final CodecFactory delegate; - - public AircompressorCodecFactory(CodecFactory delegate) - { - this.delegate = requireNonNull(delegate, "delegate is null"); - } - - @Override - public Compressor createCompressor(String codecName) - { - return switch (codecName) { - case SNAPPY_CODEC_NAME -> new AircompressorCompressor(new SnappyCodec()); - case LZO_CODEC_NAME, LZO_CODEC_NAME_DEPRECATED -> new AircompressorCompressor(new LzoCodec()); - case LZ4_CODEC_NAME -> new AircompressorCompressor(new Lz4Codec()); - case GZIP_CODEC_NAME -> new AircompressorCompressor(new JdkGzipCodec()); - default -> delegate.createCompressor(codecName); - }; - } - - @Override - public Decompressor createDecompressor(String codecName) - { - return switch (codecName) { - case SNAPPY_CODEC_NAME -> new AircompressorDecompressor(new SnappyCodec()); - case LZO_CODEC_NAME, LZO_CODEC_NAME_DEPRECATED -> new AircompressorDecompressor(new LzoCodec()); - case LZ4_CODEC_NAME -> new AircompressorDecompressor(new Lz4Codec()); - case GZIP_CODEC_NAME -> new AircompressorDecompressor(new JdkGzipCodec()); - default -> delegate.createDecompressor(codecName); - }; - } -} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCompressor.java deleted file mode 100644 index af24253aa354..000000000000 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorCompressor.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hive.formats.compression; - -import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.io.compress.CompressionOutputStream; - -import java.io.IOException; -import java.io.UncheckedIOException; -import java.util.function.Supplier; - -import static java.util.Objects.requireNonNull; - -public class AircompressorCompressor - implements Compressor -{ - private final CompressionCodec codec; - - public AircompressorCompressor(CompressionCodec codec) - { - this.codec = requireNonNull(codec, "codec is null"); - } - - @Override - public MemoryCompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize) - { - return new AircompressorCompressedSliceOutputSupplier(codec, minChunkSize, maxChunkSize).get(); - } - - private static class AircompressorCompressedSliceOutputSupplier - implements Supplier - { - private final CompressionCodec codec; - private final org.apache.hadoop.io.compress.Compressor compressor; - private final ChunkedSliceOutput compressedOutput; - - public AircompressorCompressedSliceOutputSupplier(CompressionCodec codec, int minChunkSize, int maxChunkSize) - { - this.codec = requireNonNull(codec, "codec is null"); - this.compressor = codec.createCompressor(); - this.compressedOutput = new ChunkedSliceOutput(minChunkSize, maxChunkSize); - } - - @Override - public MemoryCompressedSliceOutput get() - { - try { - compressor.reset(); - compressedOutput.reset(); - CompressionOutputStream compressionStream = codec.createOutputStream(compressedOutput, compressor); - return new MemoryCompressedSliceOutput(compressionStream, compressedOutput, this, () -> {}); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } - } -} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorDecompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorDecompressor.java deleted file mode 100644 index 100ea9a76edb..000000000000 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/AircompressorDecompressor.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hive.formats.compression; - -import io.airlift.slice.Slice; -import io.trino.hive.formats.rcfile.RcFileCorruptionException; -import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.io.compress.CompressionInputStream; - -import java.io.IOException; - -import static java.util.Objects.requireNonNull; - -public class AircompressorDecompressor - implements Decompressor -{ - private final CompressionCodec codec; - - public AircompressorDecompressor(CompressionCodec codec) - { - this.codec = requireNonNull(codec, "codec is null"); - } - - @Override - public void decompress(Slice compressed, Slice uncompressed) - throws RcFileCorruptionException - { - try (CompressionInputStream decompressorStream = codec.createInputStream(compressed.getInput())) { - uncompressed.setBytes(0, decompressorStream, uncompressed.length()); - } - catch (IndexOutOfBoundsException | IOException e) { - throw new RcFileCorruptionException(e, "Compressed stream is truncated"); - } - } - - @Override - public void destroy() {} -} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Compressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Codec.java similarity index 53% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Compressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Codec.java index 70f742b18117..9b55665110f9 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Compressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Codec.java @@ -13,7 +13,22 @@ */ package io.trino.hive.formats.compression; -public interface Compressor +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +public interface Codec { - MemoryCompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize); + OutputStream createStreamCompressor(OutputStream outputStream) + throws IOException; + + ValueCompressor createValueCompressor(); + + MemoryCompressedSliceOutput createMemoryCompressedSliceOutput(int minChunkSize, int maxChunkSize) + throws IOException; + + InputStream createStreamDecompressor(InputStream inputStream) + throws IOException; + + ValueDecompressor createValueDecompressor(); } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/CompressionKind.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/CompressionKind.java new file mode 100644 index 000000000000..f126e2e5edb7 --- /dev/null +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/CompressionKind.java @@ -0,0 +1,137 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.hive.formats.compression; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import io.airlift.compress.gzip.JdkGzipCodec; +import io.airlift.compress.lz4.Lz4Codec; +import io.airlift.compress.lzo.LzoCodec; +import io.airlift.compress.snappy.SnappyCodec; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.Function; + +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static io.trino.hadoop.ConfigurationInstantiator.newEmptyConfiguration; +import static java.util.Objects.requireNonNull; + +public enum CompressionKind +{ + SNAPPY(".snappy", "org.apache.hadoop.io.compress.SnappyCodec") { + @Override + public Codec createCodec() + { + return new AircompressorCodec(new SnappyCodec()); + } + }, + LZO(".lzo_deflate", "org.apache.hadoop.io.compress.LzoCodec", "com.hadoop.compression.lzo.LzoCodec") { + @Override + public Codec createCodec() + { + return new AircompressorCodec(new LzoCodec()); + } + }, + LZ4(".lz4", "org.apache.hadoop.io.compress.Lz4Codec") { + @Override + public Codec createCodec() + { + return new AircompressorCodec(new Lz4Codec()); + } + }, + GZIP(".gz", "org.apache.hadoop.io.compress.GzipCodec") { + @Override + public Codec createCodec() + { + return new AircompressorCodec(new JdkGzipCodec()); + } + }, + ZSTD(".zst", "org.apache.hadoop.io.compress.ZStandardCodec") { + @Override + public Codec createCodec() + { + org.apache.hadoop.io.compress.ZStandardCodec codec = new org.apache.hadoop.io.compress.ZStandardCodec(); + codec.setConf(newEmptyConfiguration()); + return new HadoopCodec(codec); + } + }, + BZIP2(".bz2", "org.apache.hadoop.io.compress.BZip2Codec") { + @Override + public Codec createCodec() + { + org.apache.hadoop.io.compress.BZip2Codec codec = new org.apache.hadoop.io.compress.BZip2Codec(); + codec.setConf(newEmptyConfiguration()); + return new HadoopCodec(codec); + } + }; + + private final List hadoopClassNames; + private final String fileExtension; + + CompressionKind(String fileExtension, String... hadoopClassNames) + { + this.hadoopClassNames = ImmutableList.copyOf(hadoopClassNames); + this.fileExtension = requireNonNull(fileExtension, "fileExtension is null"); + } + + public String getHadoopClassName() + { + return hadoopClassNames.get(0); + } + + public String getFileExtension() + { + return fileExtension; + } + + public abstract Codec createCodec(); + + private static final Map CODECS_BY_HADOOP_CLASS_NAME; + + static { + ImmutableMap.Builder builder = ImmutableMap.builder(); + for (CompressionKind codec : values()) { + for (String hadoopClassNames : codec.hadoopClassNames) { + builder.put(hadoopClassNames, codec); + } + } + CODECS_BY_HADOOP_CLASS_NAME = builder.buildOrThrow(); + } + + public static CompressionKind fromHadoopClassName(String hadoopClassName) + { + return Optional.ofNullable(CODECS_BY_HADOOP_CLASS_NAME.get(hadoopClassName)) + .orElseThrow(() -> new IllegalArgumentException("Unknown codec: " + hadoopClassName)); + } + + public static Codec createCodecFromHadoopClassName(String hadoopClassName) + { + return Optional.ofNullable(CODECS_BY_HADOOP_CLASS_NAME.get(hadoopClassName)) + .orElseThrow(() -> new IllegalArgumentException("Unknown codec: " + hadoopClassName)) + .createCodec(); + } + + private static final Map CODECS_BY_FILE_EXTENSION = Arrays.stream(values()) + .filter(codec -> codec.fileExtension != null) + .collect(toImmutableMap(codec -> codec.fileExtension, Function.identity())); + + public static Optional createCodecFromExtension(String extension) + { + return Optional.ofNullable(CODECS_BY_FILE_EXTENSION.get(extension)) + .map(CompressionKind::createCodec); + } +} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodec.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodec.java new file mode 100644 index 000000000000..91afbae054eb --- /dev/null +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodec.java @@ -0,0 +1,175 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.hive.formats.compression; + +import io.airlift.slice.DynamicSliceOutput; +import io.airlift.slice.Slice; +import org.apache.hadoop.io.compress.CodecPool; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionInputStream; +import org.apache.hadoop.io.compress.CompressionOutputStream; +import org.apache.hadoop.io.compress.Compressor; +import org.apache.hadoop.io.compress.Decompressor; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.UncheckedIOException; +import java.util.function.Supplier; + +import static com.google.common.base.Preconditions.checkState; +import static java.util.Objects.requireNonNull; + +public class HadoopCodec + implements Codec +{ + private final CompressionCodec codec; + + public HadoopCodec(CompressionCodec codec) + { + this.codec = requireNonNull(codec, "codec is null"); + } + + @Override + public OutputStream createStreamCompressor(OutputStream outputStream) + throws IOException + { + return codec.createOutputStream(outputStream); + } + + @Override + public ValueCompressor createValueCompressor() + { + return new HadoopValueCompressor(codec); + } + + private static class HadoopValueCompressor + implements ValueCompressor + { + private final CompressionCodec codec; + private final Compressor compressor; + private final DynamicSliceOutput buffer; + + private HadoopValueCompressor(CompressionCodec codec) + { + this.codec = requireNonNull(codec, "codec is null"); + this.compressor = CodecPool.getCompressor(requireNonNull(codec, "codec is null")); + this.buffer = new DynamicSliceOutput(1024); + } + + @Override + public Slice compress(Slice slice) + throws IOException + { + compressor.reset(); + buffer.reset(); + try (CompressionOutputStream compressionStream = codec.createOutputStream(buffer, compressor)) { + slice.getInput().transferTo(compressionStream); + } + return buffer.slice(); + } + + @Override + public void close() + { + CodecPool.returnCompressor(compressor); + } + } + + @Override + public MemoryCompressedSliceOutput createMemoryCompressedSliceOutput(int minChunkSize, int maxChunkSize) + { + return new HadoopCompressedSliceOutputSupplier(codec, minChunkSize, maxChunkSize).get(); + } + + private static class HadoopCompressedSliceOutputSupplier + implements Supplier + { + private final CompressionCodec codec; + private final Compressor compressor; + private final ChunkedSliceOutput bufferedOutput; + + public HadoopCompressedSliceOutputSupplier(CompressionCodec codec, int minChunkSize, int maxChunkSize) + { + this.codec = requireNonNull(codec, "codec is null"); + this.compressor = CodecPool.getCompressor(requireNonNull(codec, "codec is null")); + this.bufferedOutput = new ChunkedSliceOutput(minChunkSize, maxChunkSize); + } + + @Override + public MemoryCompressedSliceOutput get() + { + try { + compressor.reset(); + bufferedOutput.reset(); + CompressionOutputStream compressionStream = codec.createOutputStream(bufferedOutput, compressor); + return new MemoryCompressedSliceOutput(compressionStream, bufferedOutput, this, () -> CodecPool.returnCompressor(compressor)); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + } + } + + @Override + public InputStream createStreamDecompressor(InputStream inputStream) + throws IOException + { + return codec.createInputStream(inputStream); + } + + @Override + public ValueDecompressor createValueDecompressor() + { + return new HadoopValueDecompressor(codec); + } + + private static class HadoopValueDecompressor + implements ValueDecompressor + { + private final CompressionCodec codec; + private final Decompressor decompressor; + private boolean closed; + + private HadoopValueDecompressor(CompressionCodec codec) + { + this.codec = requireNonNull(codec, "codec is null"); + decompressor = CodecPool.getDecompressor(codec); + } + + @Override + public void decompress(Slice compressed, Slice uncompressed) + throws IOException + { + checkState(!closed, "Value decompressor has been closed"); + decompressor.reset(); + try (CompressionInputStream decompressorStream = codec.createInputStream(compressed.getInput(), decompressor)) { + uncompressed.setBytes(0, decompressorStream, uncompressed.length()); + } + catch (IndexOutOfBoundsException | IOException e) { + throw new IOException("Compressed stream is truncated", e); + } + } + + @Override + public void close() + { + if (closed) { + return; + } + closed = true; + CodecPool.returnDecompressor(decompressor); + } + } +} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodecFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodecFactory.java deleted file mode 100644 index dfeeb10f20aa..000000000000 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCodecFactory.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hive.formats.compression; - -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.io.compress.CompressionCodec; - -import java.lang.reflect.Constructor; - -import static io.trino.hadoop.ConfigurationInstantiator.newEmptyConfiguration; - -public class HadoopCodecFactory - implements CodecFactory -{ - private final ClassLoader classLoader; - - public HadoopCodecFactory(ClassLoader classLoader) - { - this.classLoader = classLoader; - } - - @Override - public Compressor createCompressor(String codecName) - { - CompressionCodec codec = createCompressionCodec(codecName); - return new HadoopCompressor(codec); - } - - @Override - public Decompressor createDecompressor(String codecName) - { - CompressionCodec codec = createCompressionCodec(codecName); - return new HadoopDecompressor(codec); - } - - private CompressionCodec createCompressionCodec(String codecName) - { - try { - Class codecClass = classLoader.loadClass(codecName).asSubclass(CompressionCodec.class); - Constructor constructor = codecClass.getDeclaredConstructor(); - constructor.setAccessible(true); - CompressionCodec codec = constructor.newInstance(); - if (codec instanceof Configurable) { - // Hadoop is crazy... you have to give codecs an empty configuration, or they throw NPEs, - // but you need to make sure the configuration doesn't "load" defaults, or it spends - // forever loading XML with no useful information - ((Configurable) codec).setConf(newEmptyConfiguration()); - } - return codec; - } - catch (ReflectiveOperationException e) { - throw new IllegalArgumentException("Unknown codec: " + codecName, e); - } - } -} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCompressor.java deleted file mode 100644 index d3f1352783af..000000000000 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopCompressor.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hive.formats.compression; - -import org.apache.hadoop.io.compress.CodecPool; -import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.io.compress.CompressionOutputStream; - -import java.io.IOException; -import java.io.UncheckedIOException; -import java.util.function.Supplier; - -import static java.util.Objects.requireNonNull; - -public class HadoopCompressor - implements Compressor -{ - private final CompressionCodec codec; - - public HadoopCompressor(CompressionCodec codec) - { - this.codec = requireNonNull(codec, "codec is null"); - } - - @Override - public MemoryCompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize) - { - return new HadoopCompressedSliceOutputSupplier(codec, minChunkSize, maxChunkSize).get(); - } - - private static class HadoopCompressedSliceOutputSupplier - implements Supplier - { - private final CompressionCodec codec; - private final org.apache.hadoop.io.compress.Compressor compressor; - private final ChunkedSliceOutput bufferedOutput; - - public HadoopCompressedSliceOutputSupplier(CompressionCodec codec, int minChunkSize, int maxChunkSize) - { - this.codec = requireNonNull(codec, "codec is null"); - this.compressor = CodecPool.getCompressor(requireNonNull(codec, "codec is null")); - this.bufferedOutput = new ChunkedSliceOutput(minChunkSize, maxChunkSize); - } - - @Override - public MemoryCompressedSliceOutput get() - { - try { - compressor.reset(); - bufferedOutput.reset(); - CompressionOutputStream compressionStream = codec.createOutputStream(bufferedOutput, compressor); - return new MemoryCompressedSliceOutput(compressionStream, bufferedOutput, this, () -> CodecPool.returnCompressor(compressor)); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } - } -} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopDecompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopDecompressor.java deleted file mode 100644 index 408e3639317e..000000000000 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/HadoopDecompressor.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hive.formats.compression; - -import io.airlift.slice.Slice; -import io.trino.hive.formats.rcfile.RcFileCorruptionException; -import org.apache.hadoop.io.compress.CodecPool; -import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.io.compress.CompressionInputStream; - -import java.io.IOException; - -import static com.google.common.base.Preconditions.checkState; -import static java.util.Objects.requireNonNull; - -public class HadoopDecompressor - implements Decompressor -{ - private final CompressionCodec codec; - private final org.apache.hadoop.io.compress.Decompressor decompressor; - private boolean destroyed; - - public HadoopDecompressor(CompressionCodec codec) - { - this.codec = requireNonNull(codec, "codec is null"); - decompressor = CodecPool.getDecompressor(codec); - } - - @Override - public void decompress(Slice compressed, Slice uncompressed) - throws RcFileCorruptionException - { - checkState(!destroyed, "Codec has been destroyed"); - decompressor.reset(); - try (CompressionInputStream decompressorStream = codec.createInputStream(compressed.getInput(), decompressor)) { - uncompressed.setBytes(0, decompressorStream, uncompressed.length()); - } - catch (IndexOutOfBoundsException | IOException e) { - throw new RcFileCorruptionException(e, "Compressed stream is truncated"); - } - } - - @Override - public void destroy() - { - if (destroyed) { - return; - } - destroyed = true; - CodecPool.returnDecompressor(decompressor); - } -} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/MemoryCompressedSliceOutput.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/MemoryCompressedSliceOutput.java index a7975d4b729a..04b5a636084f 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/MemoryCompressedSliceOutput.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/MemoryCompressedSliceOutput.java @@ -105,4 +105,27 @@ public void destroy() } } } + + public static MemoryCompressedSliceOutput createUncompressedMemorySliceOutput(int minChunkSize, int maxChunkSize) + { + return new UncompressedSliceOutputSupplier(minChunkSize, maxChunkSize).get(); + } + + private static class UncompressedSliceOutputSupplier + implements Supplier + { + private final ChunkedSliceOutput chunkedSliceOutput; + + private UncompressedSliceOutputSupplier(int minChunkSize, int maxChunkSize) + { + chunkedSliceOutput = new ChunkedSliceOutput(minChunkSize, maxChunkSize); + } + + @Override + public MemoryCompressedSliceOutput get() + { + chunkedSliceOutput.reset(); + return new MemoryCompressedSliceOutput(chunkedSliceOutput, chunkedSliceOutput, this, () -> {}); + } + } } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/NoneCompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/NoneCompressor.java deleted file mode 100644 index efc5450b01c5..000000000000 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/NoneCompressor.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hive.formats.compression; - -import java.util.function.Supplier; - -public class NoneCompressor - implements Compressor -{ - @Override - public MemoryCompressedSliceOutput createCompressedSliceOutput(int minChunkSize, int maxChunkSize) - { - return new NoneCompressedSliceOutputSupplier(minChunkSize, maxChunkSize).get(); - } - - private static class NoneCompressedSliceOutputSupplier - implements Supplier - { - private final ChunkedSliceOutput chunkedSliceOutput; - - private NoneCompressedSliceOutputSupplier(int minChunkSize, int maxChunkSize) - { - chunkedSliceOutput = new ChunkedSliceOutput(minChunkSize, maxChunkSize); - } - - @Override - public MemoryCompressedSliceOutput get() - { - chunkedSliceOutput.reset(); - return new MemoryCompressedSliceOutput(chunkedSliceOutput, chunkedSliceOutput, this, () -> {}); - } - } -} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/CodecFactory.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ValueCompressor.java similarity index 70% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/CodecFactory.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ValueCompressor.java index 86121fa559a7..0dd99b5b677b 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/CodecFactory.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ValueCompressor.java @@ -13,9 +13,17 @@ */ package io.trino.hive.formats.compression; -public interface CodecFactory +import io.airlift.slice.Slice; + +import java.io.Closeable; +import java.io.IOException; + +public interface ValueCompressor + extends Closeable { - Compressor createCompressor(String codecName); + Slice compress(Slice slice) + throws IOException; - Decompressor createDecompressor(String codecName); + @Override + default void close() {} } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Decompressor.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ValueDecompressor.java similarity index 78% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Decompressor.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ValueDecompressor.java index a0c2751ae0ce..3eaa64836a2e 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/Decompressor.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/compression/ValueDecompressor.java @@ -14,12 +14,16 @@ package io.trino.hive.formats.compression; import io.airlift.slice.Slice; -import io.trino.hive.formats.rcfile.RcFileCorruptionException; -public interface Decompressor +import java.io.Closeable; +import java.io.IOException; + +public interface ValueDecompressor + extends Closeable { void decompress(Slice compressed, Slice uncompressed) - throws RcFileCorruptionException; + throws IOException; - void destroy(); + @Override + default void close() {} } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java index 397458e8f6e8..198046ac6c86 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java @@ -23,8 +23,8 @@ import io.airlift.slice.Slices; import io.airlift.units.DataSize; import io.airlift.units.DataSize.Unit; -import io.trino.hive.formats.compression.CodecFactory; -import io.trino.hive.formats.compression.Decompressor; +import io.trino.hive.formats.compression.CompressionKind; +import io.trino.hive.formats.compression.ValueDecompressor; import io.trino.hive.formats.rcfile.RcFileWriteValidation.WriteChecksum; import io.trino.hive.formats.rcfile.RcFileWriteValidation.WriteChecksumBuilder; import io.trino.spi.Page; @@ -78,7 +78,7 @@ public class RcFileReader private final byte version; - private final Decompressor decompressor; + private final ValueDecompressor decompressor; private final Map metadata; private final int columnCount; @@ -108,20 +108,18 @@ public RcFileReader( RcFileDataSource dataSource, RcFileEncoding encoding, Map readColumns, - CodecFactory codecFactory, long offset, long length, DataSize bufferSize) throws IOException { - this(dataSource, encoding, readColumns, codecFactory, offset, length, bufferSize, Optional.empty()); + this(dataSource, encoding, readColumns, offset, length, bufferSize, Optional.empty()); } private RcFileReader( RcFileDataSource dataSource, RcFileEncoding encoding, Map readColumns, - CodecFactory codecFactory, long offset, long length, DataSize bufferSize, @@ -179,7 +177,7 @@ else if (SEQUENCE_FILE_MAGIC.equals(magic)) { if (compressed) { String codecClassName = readLengthPrefixedString(input).toStringUtf8(); validateWrite(validation -> validation.getCodecClassName().equals(Optional.of(codecClassName)), "Unexpected compression codec"); - this.decompressor = codecFactory.createDecompressor(codecClassName); + this.decompressor = CompressionKind.createCodecFromHadoopClassName(codecClassName).createValueDecompressor(); } else { validateWrite(validation -> validation.getCodecClassName().equals(Optional.empty()), "Expected file to be compressed"); @@ -290,7 +288,7 @@ public void close() } finally { if (decompressor != null) { - decompressor.destroy(); + decompressor.close(); } } if (writeChecksumBuilder.isPresent()) { @@ -512,8 +510,7 @@ static void validateFile( RcFileWriteValidation writeValidation, RcFileDataSource input, RcFileEncoding encoding, - List types, - CodecFactory codecFactory) + List types) throws RcFileCorruptionException { ImmutableMap.Builder readTypes = ImmutableMap.builder(); @@ -524,7 +521,6 @@ static void validateFile( input, encoding, readTypes.buildOrThrow(), - codecFactory, 0, input.getSize(), DataSize.of(8, Unit.MEGABYTE), @@ -544,7 +540,7 @@ static void validateFile( private static class Column { private final ColumnEncoding encoding; - private final Decompressor decompressor; + private final ValueDecompressor decompressor; private BasicSliceInput lengthsInput; private Slice dataBuffer; @@ -560,7 +556,7 @@ private static class Column private int runLength; private int lastValueLength = -1; - public Column(ColumnEncoding encoding, Decompressor decompressor) + public Column(ColumnEncoding encoding, ValueDecompressor decompressor) { this.encoding = encoding; this.decompressor = decompressor; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java index 18c185cffe7a..fd37bbf31679 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java @@ -18,10 +18,9 @@ import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; import io.airlift.units.DataSize; -import io.trino.hive.formats.compression.CodecFactory; -import io.trino.hive.formats.compression.Compressor; +import io.trino.hive.formats.compression.Codec; +import io.trino.hive.formats.compression.CompressionKind; import io.trino.hive.formats.compression.MemoryCompressedSliceOutput; -import io.trino.hive.formats.compression.NoneCompressor; import io.trino.hive.formats.rcfile.RcFileWriteValidation.RcFileWriteValidationBuilder; import io.trino.spi.Page; import io.trino.spi.block.Block; @@ -73,7 +72,6 @@ public class RcFileWriter private final SliceOutput output; private final List types; private final RcFileEncoding encoding; - private final CodecFactory codecFactory; private final long syncFirst = ThreadLocalRandom.current().nextLong(); private final long syncSecond = ThreadLocalRandom.current().nextLong(); @@ -96,8 +94,7 @@ public RcFileWriter( SliceOutput output, List types, RcFileEncoding encoding, - Optional codecName, - CodecFactory codecFactory, + Optional compressionKind, Map metadata, boolean validate) throws IOException @@ -106,8 +103,7 @@ public RcFileWriter( output, types, encoding, - codecName, - codecFactory, + compressionKind, metadata, DEFAULT_TARGET_MIN_ROW_GROUP_SIZE, DEFAULT_TARGET_MAX_ROW_GROUP_SIZE, @@ -118,8 +114,7 @@ public RcFileWriter( SliceOutput output, List types, RcFileEncoding encoding, - Optional codecName, - CodecFactory codecFactory, + Optional compressionKind, Map metadata, DataSize targetMinRowGroupSize, DataSize targetMaxRowGroupSize, @@ -130,8 +125,7 @@ public RcFileWriter( requireNonNull(types, "types is null"); checkArgument(!types.isEmpty(), "types is empty"); requireNonNull(encoding, "encoding is null"); - requireNonNull(codecName, "codecName is null"); - requireNonNull(codecFactory, "codecFactory is null"); + requireNonNull(compressionKind, "compressionKind is null"); requireNonNull(metadata, "metadata is null"); checkArgument(!metadata.containsKey(PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY), "Cannot set property %s", PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY); checkArgument(!metadata.containsKey(COLUMN_COUNT_METADATA_KEY), "Cannot set property %s", COLUMN_COUNT_METADATA_KEY); @@ -144,7 +138,6 @@ public RcFileWriter( this.output = output; this.types = types; this.encoding = encoding; - this.codecFactory = codecFactory; // write header output.writeBytes(RCFILE_MAGIC); @@ -152,9 +145,9 @@ public RcFileWriter( recordValidation(validation -> validation.setVersion((byte) CURRENT_VERSION)); // write codec information - output.writeBoolean(codecName.isPresent()); - codecName.ifPresent(name -> writeLengthPrefixedString(output, utf8Slice(name))); - recordValidation(validation -> validation.setCodecClassName(codecName)); + output.writeBoolean(compressionKind.isPresent()); + compressionKind.map(CompressionKind::getHadoopClassName).ifPresent(name -> writeLengthPrefixedString(output, utf8Slice(name))); + recordValidation(validation -> validation.setCodecClassName(compressionKind.map(CompressionKind::getHadoopClassName))); // write metadata output.writeInt(Integer.reverseBytes(metadata.size() + 2)); @@ -171,14 +164,14 @@ public RcFileWriter( recordValidation(validation -> validation.setSyncSecond(syncSecond)); // initialize columns - Compressor compressor = codecName.map(codecFactory::createCompressor).orElse(new NoneCompressor()); - keySectionOutput = compressor.createCompressedSliceOutput((int) MIN_BUFFER_SIZE.toBytes(), (int) MAX_BUFFER_SIZE.toBytes()); + Optional codec = compressionKind.map(CompressionKind::createCodec); + keySectionOutput = createMemoryCompressedSliceOutput(codec); keySectionOutput.close(); // output is recycled on first use which requires the output to be closed columnEncoders = new ColumnEncoder[types.size()]; for (int columnIndex = 0; columnIndex < types.size(); columnIndex++) { Type type = types.get(columnIndex); ColumnEncoding columnEncoding = encoding.getEncoding(type); - columnEncoders[columnIndex] = new ColumnEncoder(columnEncoding, compressor); + columnEncoders[columnIndex] = new ColumnEncoder(columnEncoding, codec); } this.targetMinRowGroupSize = toIntExact(targetMinRowGroupSize.toBytes()); this.targetMaxRowGroupSize = toIntExact(targetMaxRowGroupSize.toBytes()); @@ -220,8 +213,7 @@ public void validate(RcFileDataSource input) validationBuilder.build(), input, encoding, - types, - codecFactory); + types); } public long getRetainedSizeInBytes() @@ -329,6 +321,15 @@ private void writeRowGroup() bufferedRows = 0; } + private static MemoryCompressedSliceOutput createMemoryCompressedSliceOutput(Optional codec) + throws IOException + { + if (codec.isPresent()) { + return codec.get().createMemoryCompressedSliceOutput((int) MIN_BUFFER_SIZE.toBytes(), (int) MAX_BUFFER_SIZE.toBytes()); + } + return MemoryCompressedSliceOutput.createUncompressedMemorySliceOutput((int) MIN_BUFFER_SIZE.toBytes(), (int) MAX_BUFFER_SIZE.toBytes()); + } + private static class ColumnEncoder { private static final int INSTANCE_SIZE = toIntExact(ClassLayout.parseClass(ColumnEncoder.class).instanceSize() + ClassLayout.parseClass(ColumnEncodeOutput.class).instanceSize()); @@ -343,10 +344,11 @@ private static class ColumnEncoder private boolean columnClosed; - public ColumnEncoder(ColumnEncoding columnEncoding, Compressor compressor) + public ColumnEncoder(ColumnEncoding columnEncoding, Optional codec) + throws IOException { this.columnEncoding = columnEncoding; - this.output = compressor.createCompressedSliceOutput((int) MIN_BUFFER_SIZE.toBytes(), (int) MAX_BUFFER_SIZE.toBytes()); + this.output = createMemoryCompressedSliceOutput(codec); this.encodeOutput = new ColumnEncodeOutput(lengthOutput, output); } diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java index 028630c7233c..12481ba5dd48 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java @@ -24,8 +24,7 @@ import io.airlift.slice.Slices; import io.airlift.units.DataSize; import io.trino.hadoop.HadoopNative; -import io.trino.hive.formats.compression.AircompressorCodecFactory; -import io.trino.hive.formats.compression.HadoopCodecFactory; +import io.trino.hive.formats.compression.CompressionKind; import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding; import io.trino.hive.formats.rcfile.text.TextRcFileEncoding; import io.trino.spi.Page; @@ -81,10 +80,6 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.compress.BZip2Codec; -import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.hadoop.io.compress.Lz4Codec; -import org.apache.hadoop.io.compress.SnappyCodec; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; @@ -125,10 +120,10 @@ import static io.trino.hadoop.ConfigurationInstantiator.newEmptyConfiguration; import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.findFirstSyncPosition; import static io.trino.hive.formats.rcfile.RcFileTester.Compression.BZIP2; +import static io.trino.hive.formats.rcfile.RcFileTester.Compression.GZIP; import static io.trino.hive.formats.rcfile.RcFileTester.Compression.LZ4; import static io.trino.hive.formats.rcfile.RcFileTester.Compression.NONE; import static io.trino.hive.formats.rcfile.RcFileTester.Compression.SNAPPY; -import static io.trino.hive.formats.rcfile.RcFileTester.Compression.ZLIB; import static io.trino.hive.formats.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION; import static io.trino.hive.formats.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY; import static io.trino.spi.type.BigintType.BIGINT; @@ -233,43 +228,24 @@ public RcFileEncoding getVectorEncoding() public enum Compression { - BZIP2 { - @Override - Optional getCodecName() - { - return Optional.of(BZip2Codec.class.getName()); - } - }, - ZLIB { - @Override - Optional getCodecName() - { - return Optional.of(GzipCodec.class.getName()); - } - }, - SNAPPY { - @Override - Optional getCodecName() - { - return Optional.of(SnappyCodec.class.getName()); - } - }, - LZ4 { - @Override - Optional getCodecName() - { - return Optional.of(Lz4Codec.class.getName()); - } - }, - NONE { - @Override - Optional getCodecName() - { - return Optional.empty(); - } - }; + SNAPPY(CompressionKind.SNAPPY), + LZ4(CompressionKind.LZ4), + GZIP(CompressionKind.GZIP), + ZSTD(CompressionKind.ZSTD), + BZIP2(CompressionKind.BZIP2), + NONE(null); - abstract Optional getCodecName(); + private final Optional compressionKind; + + Compression(CompressionKind compressionKind) + { + this.compressionKind = Optional.ofNullable(compressionKind); + } + + public Optional getCompressionKind() + { + return compressionKind; + } } private boolean structTestsEnabled; @@ -305,7 +281,7 @@ public static RcFileTester fullTestRcFileReader() // These compression algorithms were chosen to cover the three different // cases: uncompressed, aircompressor, and hadoop compression // We assume that the compression algorithms generally work - rcFileTester.compressions = ImmutableSet.of(NONE, LZ4, ZLIB, BZIP2); + rcFileTester.compressions = ImmutableSet.of(NONE, LZ4, GZIP, BZIP2); return rcFileTester; } @@ -619,7 +595,6 @@ private static RcFileReader createRcFileReader(TempFile tempFile, Type type, RcF rcFileDataSource, encoding, ImmutableMap.of(0, type), - new AircompressorCodecFactory(new HadoopCodecFactory(RcFileTester.class.getClassLoader())), 0, tempFile.getFile().length(), DataSize.of(8, MEGABYTE)); @@ -633,13 +608,11 @@ private static DataSize writeRcFileColumnNew(File outputFile, Format format, Com throws Exception { OutputStreamSliceOutput output = new OutputStreamSliceOutput(new FileOutputStream(outputFile)); - AircompressorCodecFactory codecFactory = new AircompressorCodecFactory(new HadoopCodecFactory(RcFileTester.class.getClassLoader())); RcFileWriter writer = new RcFileWriter( output, ImmutableList.of(type), format.getVectorEncoding(), - compression.getCodecName(), - codecFactory, + compression.getCompressionKind(), metadata, DataSize.of(100, KILOBYTE), // use a smaller size to create more row groups DataSize.of(200, KILOBYTE), @@ -1061,7 +1034,7 @@ private static RecordWriter createRcFileWriterOld(File outputFile, Compression c throws IOException { JobConf jobConf = new JobConf(false); - Optional codecName = compression.getCodecName(); + Optional codecName = compression.getCompressionKind().map(CompressionKind::getHadoopClassName); codecName.ifPresent(s -> jobConf.set(COMPRESS_CODEC, s)); return new RCFileOutputFormat().getHiveRecordWriter( diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java index 567761a03679..2cc1bc53cbe7 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java @@ -19,9 +19,6 @@ import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; import io.airlift.units.DataSize; -import io.trino.hive.formats.compression.CodecFactory; -import io.trino.hive.formats.compression.Compressor; -import io.trino.hive.formats.compression.Decompressor; import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding; import io.trino.spi.block.Block; import org.joda.time.DateTimeZone; @@ -242,7 +239,6 @@ private static List readValues(Slice data, int offset, int length) new MemoryRcFileDataSource(new RcFileDataSourceId("test"), data), new BinaryRcFileEncoding(DateTimeZone.UTC), ImmutableMap.of(0, SMALLINT), - new BogusRcFileCodecFactory(), offset, length, DataSize.of(8, MEGABYTE)); @@ -293,20 +289,4 @@ public List getRowGroupSegmentOffsets() return rowGroupSegmentOffsets; } } - - private static class BogusRcFileCodecFactory - implements CodecFactory - { - @Override - public Compressor createCompressor(String codecName) - { - throw new UnsupportedOperationException(); - } - - @Override - public Decompressor createDecompressor(String codecName) - { - throw new UnsupportedOperationException(); - } - } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveCompressionCodec.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveCompressionCodec.java index 7a9aab74fd9a..eeda7a40a137 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveCompressionCodec.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveCompressionCodec.java @@ -15,11 +15,6 @@ import io.trino.orc.metadata.CompressionKind; import org.apache.avro.file.DataFileConstants; -import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.hadoop.io.compress.Lz4Codec; -import org.apache.hadoop.io.compress.SnappyCodec; -import org.apache.hadoop.io.compress.ZStandardCodec; import org.apache.parquet.hadoop.metadata.CompressionCodecName; import java.util.Optional; @@ -29,34 +24,34 @@ public enum HiveCompressionCodec { NONE(null, CompressionKind.NONE, CompressionCodecName.UNCOMPRESSED, DataFileConstants.NULL_CODEC), - SNAPPY(SnappyCodec.class, CompressionKind.SNAPPY, CompressionCodecName.SNAPPY, DataFileConstants.SNAPPY_CODEC), - LZ4(Lz4Codec.class, CompressionKind.LZ4, CompressionCodecName.LZ4, null), - ZSTD(ZStandardCodec.class, CompressionKind.ZSTD, CompressionCodecName.ZSTD, DataFileConstants.ZSTANDARD_CODEC), + SNAPPY(io.trino.hive.formats.compression.CompressionKind.SNAPPY, CompressionKind.SNAPPY, CompressionCodecName.SNAPPY, DataFileConstants.SNAPPY_CODEC), + LZ4(io.trino.hive.formats.compression.CompressionKind.LZ4, CompressionKind.LZ4, CompressionCodecName.LZ4, null), + ZSTD(io.trino.hive.formats.compression.CompressionKind.ZSTD, CompressionKind.ZSTD, CompressionCodecName.ZSTD, DataFileConstants.ZSTANDARD_CODEC), // Using DEFLATE for GZIP for Avro for now so Avro files can be written in default configuration // TODO(https://github.com/trinodb/trino/issues/12580) change GZIP to be unsupported for Avro when we change Trino default compression to be storage format aware - GZIP(GzipCodec.class, CompressionKind.ZLIB, CompressionCodecName.GZIP, DataFileConstants.DEFLATE_CODEC); + GZIP(io.trino.hive.formats.compression.CompressionKind.GZIP, CompressionKind.ZLIB, CompressionCodecName.GZIP, DataFileConstants.DEFLATE_CODEC); - private final Optional> codec; + private final Optional hiveCompressionKind; private final CompressionKind orcCompressionKind; private final CompressionCodecName parquetCompressionCodec; private final Optional avroCompressionCodec; HiveCompressionCodec( - Class codec, + io.trino.hive.formats.compression.CompressionKind hiveCompressionKind, CompressionKind orcCompressionKind, CompressionCodecName parquetCompressionCodec, String avroCompressionCodec) { - this.codec = Optional.ofNullable(codec); + this.hiveCompressionKind = Optional.ofNullable(hiveCompressionKind); this.orcCompressionKind = requireNonNull(orcCompressionKind, "orcCompressionKind is null"); this.parquetCompressionCodec = requireNonNull(parquetCompressionCodec, "parquetCompressionCodec is null"); this.avroCompressionCodec = Optional.ofNullable(avroCompressionCodec); } - public Optional> getCodec() + public Optional getHiveCompressionKind() { - return codec; + return hiveCompressionKind; } public CompressionKind getOrcCompressionKind() diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java index 8b7a99b56285..4571beae0740 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java @@ -16,8 +16,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.io.CountingOutputStream; import io.airlift.slice.OutputStreamSliceOutput; -import io.trino.hive.formats.compression.AircompressorCodecFactory; -import io.trino.hive.formats.compression.HadoopCodecFactory; +import io.trino.hive.formats.compression.CompressionKind; import io.trino.hive.formats.rcfile.RcFileDataSource; import io.trino.hive.formats.rcfile.RcFileEncoding; import io.trino.hive.formats.rcfile.RcFileWriter; @@ -67,7 +66,7 @@ public RcFileFileWriter( Closeable rollbackAction, RcFileEncoding rcFileEncoding, List fileColumnTypes, - Optional codecName, + Optional compressionKind, int[] fileInputColumnIndexes, Map metadata, Optional> validationInputFactory) @@ -78,8 +77,7 @@ public RcFileFileWriter( new OutputStreamSliceOutput(this.outputStream), fileColumnTypes, rcFileEncoding, - codecName, - new AircompressorCodecFactory(new HadoopCodecFactory(getClass().getClassLoader())), + compressionKind, metadata, validationInputFactory.isPresent()); this.rollbackAction = requireNonNull(rollbackAction, "rollbackAction is null"); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java index 224f8dc6b56a..aae254beb724 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java @@ -15,6 +15,7 @@ import com.google.common.collect.ImmutableMap; import io.trino.hdfs.HdfsEnvironment; +import io.trino.hive.formats.compression.CompressionKind; import io.trino.hive.formats.rcfile.RcFileDataSource; import io.trino.hive.formats.rcfile.RcFileEncoding; import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding; @@ -119,7 +120,8 @@ else if (COLUMNAR_SERDE_CLASS.equals(storageFormat.getSerde())) { return Optional.empty(); } - Optional codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC)); + Optional compressionKind = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC)) + .map(CompressionKind::fromHadoopClassName); // existing tables and partitions may have columns in a different order than the writer is providing, so build // an index to rearrange columns in the proper order @@ -159,7 +161,7 @@ else if (COLUMNAR_SERDE_CLASS.equals(storageFormat.getSerde())) { rollbackAction, rcFileEncoding, fileColumnTypes, - codecName, + compressionKind, fileInputColumnIndexes, ImmutableMap.builder() .put(PRESTO_VERSION_NAME, nodeVersion.toString()) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java index 8d9370ff17b2..37ed5be77d27 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java @@ -20,8 +20,6 @@ import io.airlift.units.DataSize.Unit; import io.trino.hdfs.FSDataInputStreamTail; import io.trino.hdfs.HdfsEnvironment; -import io.trino.hive.formats.compression.AircompressorCodecFactory; -import io.trino.hive.formats.compression.HadoopCodecFactory; import io.trino.hive.formats.rcfile.MemoryRcFileDataSource; import io.trino.hive.formats.rcfile.RcFileCorruptionException; import io.trino.hive.formats.rcfile.RcFileDataSource; @@ -205,7 +203,6 @@ else if (deserializerClassName.equals(COLUMNAR_SERDE_CLASS)) { dataSource, rcFileEncoding, readColumns.buildOrThrow(), - new AircompressorCodecFactory(new HadoopCodecFactory(configuration.getClassLoader())), start, length, BUFFER_SIZE); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/CompressionConfigUtil.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/CompressionConfigUtil.java index b7ced2c11f5f..525edff6c559 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/CompressionConfigUtil.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/CompressionConfigUtil.java @@ -41,9 +41,9 @@ public static void configureCompression(Configuration config, HiveCompressionCod OrcConf.COMPRESS.setString(config, compressionCodec.getOrcCompressionKind().name()); // For RCFile and Text - if (compressionCodec.getCodec().isPresent()) { - config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName()); - config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName()); + if (compressionCodec.getHiveCompressionKind().isPresent()) { + config.set("mapred.output.compression.codec", compressionCodec.getHiveCompressionKind().get().getHadoopClassName()); + config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getHiveCompressionKind().get().getHadoopClassName()); } else { config.unset("mapred.output.compression.codec"); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java index b12ab6308eca..0c56502b29f4 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java @@ -20,6 +20,7 @@ import io.airlift.compress.lzo.LzopCodec; import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.filesystem.hdfs.HdfsFileSystemFactory; +import io.trino.hive.formats.compression.CompressionKind; import io.trino.orc.OrcReaderOptions; import io.trino.orc.OrcWriterOptions; import io.trino.plugin.hive.orc.OrcFileWriterFactory; @@ -1270,15 +1271,8 @@ private void assertRead(Optional pageSourceFactory, Optio assertNotNull(session, "session must be specified"); assertTrue(rowsCount >= 0, "rowsCount must be non-negative"); - String compressionSuffix = compressionCodec.getCodec() - .map(codec -> { - try { - return codec.getConstructor().newInstance().getDefaultExtension(); - } - catch (Exception e) { - throw new RuntimeException(e); - } - }) + String compressionSuffix = compressionCodec.getHiveCompressionKind() + .map(CompressionKind::getFileExtension) .orElse(""); File file = File.createTempFile("trino_test", formatName + compressionSuffix); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java index a0203470d3c1..f7bb730dff6b 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java @@ -17,8 +17,6 @@ import io.airlift.slice.OutputStreamSliceOutput; import io.trino.filesystem.hdfs.HdfsFileSystemFactory; import io.trino.hdfs.HdfsEnvironment; -import io.trino.hive.formats.compression.AircompressorCodecFactory; -import io.trino.hive.formats.compression.HadoopCodecFactory; import io.trino.hive.formats.rcfile.RcFileEncoding; import io.trino.hive.formats.rcfile.RcFileWriter; import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding; @@ -389,8 +387,7 @@ public PrestoRcFileFormatWriter(File targetFile, List types, RcFileEncodin new OutputStreamSliceOutput(new FileOutputStream(targetFile)), types, encoding, - compressionCodec.getCodec().map(Class::getName), - new AircompressorCodecFactory(new HadoopCodecFactory(getClass().getClassLoader())), + compressionCodec.getHiveCompressionKind(), ImmutableMap.of(), true); } From c8ee8b050cea751990c0f60fbdcce5e87da290e2 Mon Sep 17 00:00:00 2001 From: Dain Sundstrom Date: Sun, 28 Aug 2022 19:26:34 -0700 Subject: [PATCH 09/10] Convert RCFile reader to Trino filesystem interface --- .../trino/filesystem/local/LocalInput.java} | 65 +- .../filesystem/local/LocalInputFile.java | 73 ++ .../trino/filesystem/memory/MemoryInput.java} | 43 +- .../filesystem/memory/MemoryInputFile.java | 75 ++ .../memory/MemorySeekableInputStream.java | 62 ++ lib/trino-hive-formats/pom.xml | 10 + .../trino/hive/formats/DataOutputStream.java | 331 ++++++++ .../hive/formats/DataSeekableInputStream.java | 457 +++++++++++ .../hive/formats/rcfile/RcFileDataSource.java | 32 - .../formats/rcfile/RcFileDataSourceId.java | 53 -- .../formats/rcfile/RcFileDecoderUtils.java | 104 ++- .../hive/formats/rcfile/RcFileReader.java | 176 ++--- .../hive/formats/rcfile/RcFileWriter.java | 30 +- .../hive/formats/TestDataOutputStream.java | 234 ++++++ .../formats/TestDataSeekableInputStream.java | 733 ++++++++++++++++++ .../hive/formats/rcfile/RcFileTester.java | 45 +- .../rcfile/TestRcFileReaderManual.java | 8 +- plugin/trino-hive/pom.xml | 5 + .../plugin/hive/MonitoredTrinoInputFile.java | 184 +++++ .../trino/plugin/hive/RcFileFileWriter.java | 24 +- .../plugin/hive/RcFileFileWriterFactory.java | 43 +- .../hive/rcfile/HdfsRcFileDataSource.java | 96 --- .../plugin/hive/rcfile/RcFilePageSource.java | 8 +- .../hive/rcfile/RcFilePageSourceFactory.java | 52 +- .../io/trino/plugin/hive/HiveTestUtils.java | 2 +- .../plugin/hive/TestHiveFileFormats.java | 8 +- .../hive/benchmark/StandardFileFormats.java | 3 +- 27 files changed, 2438 insertions(+), 518 deletions(-) rename lib/{trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/FileRcFileDataSource.java => trino-filesystem/src/main/java/io/trino/filesystem/local/LocalInput.java} (58%) create mode 100644 lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalInputFile.java rename lib/{trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/MemoryRcFileDataSource.java => trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInput.java} (59%) create mode 100644 lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInputFile.java create mode 100644 lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemorySeekableInputStream.java create mode 100644 lib/trino-hive-formats/src/main/java/io/trino/hive/formats/DataOutputStream.java create mode 100644 lib/trino-hive-formats/src/main/java/io/trino/hive/formats/DataSeekableInputStream.java delete mode 100644 lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDataSource.java delete mode 100644 lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDataSourceId.java create mode 100644 lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestDataOutputStream.java create mode 100644 lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestDataSeekableInputStream.java create mode 100644 plugin/trino-hive/src/main/java/io/trino/plugin/hive/MonitoredTrinoInputFile.java delete mode 100644 plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/HdfsRcFileDataSource.java diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/FileRcFileDataSource.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalInput.java similarity index 58% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/FileRcFileDataSource.java rename to lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalInput.java index 7ed9e7bb5bd8..a6c9f291a518 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/FileRcFileDataSource.java +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalInput.java @@ -11,78 +11,65 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.filesystem.local; + +import io.trino.filesystem.TrinoInput; +import org.apache.iceberg.Files; +import org.apache.iceberg.io.SeekableInputStream; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; +import static java.lang.Math.min; import static java.util.Objects.requireNonNull; -public class FileRcFileDataSource - implements RcFileDataSource +class LocalInput + implements TrinoInput { - private final File path; - private final long size; + private final File file; private final RandomAccessFile input; - private long readTimeNanos; - private long readBytes; - - public FileRcFileDataSource(File path) - throws IOException - { - this.path = requireNonNull(path, "path is null"); - this.size = path.length(); - this.input = new RandomAccessFile(path, "r"); - } - @Override - public void close() + public LocalInput(File file) throws IOException { - input.close(); - } - - @Override - public long getReadBytes() - { - return readBytes; + this.file = requireNonNull(file, "file is null"); + this.input = new RandomAccessFile(file, "r"); } @Override - public long getReadTimeNanos() + public SeekableInputStream inputStream() { - return readTimeNanos; - } - - @Override - public long getSize() - { - return size; + return Files.localInput(file).newStream(); } @Override public void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength) throws IOException { - long start = System.nanoTime(); - input.seek(position); input.readFully(buffer, bufferOffset, bufferLength); + } - readTimeNanos += System.nanoTime() - start; - readBytes += bufferLength; + @Override + public int readTail(byte[] buffer, int bufferOffset, int bufferLength) + throws IOException + { + int readSize = (int) min(file.length(), bufferLength); + readFully(file.length() - readSize, buffer, bufferOffset, readSize); + return readSize; } @Override - public RcFileDataSourceId getId() + public void close() + throws IOException { - return new RcFileDataSourceId(path.getPath()); + input.close(); } @Override public String toString() { - return path.getPath(); + return file.getPath(); } } diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalInputFile.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalInputFile.java new file mode 100644 index 000000000000..a0475d5fc415 --- /dev/null +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/local/LocalInputFile.java @@ -0,0 +1,73 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.local; + +import io.trino.filesystem.TrinoInput; +import io.trino.filesystem.TrinoInputFile; + +import java.io.File; +import java.io.IOException; + +import static java.util.Objects.requireNonNull; + +public class LocalInputFile + implements TrinoInputFile +{ + private final File file; + + public LocalInputFile(File file) + { + this.file = requireNonNull(file, "file is null"); + } + + @Override + public TrinoInput newInput() + throws IOException + { + return new LocalInput(file); + } + + @Override + public long length() + throws IOException + { + return file.length(); + } + + @Override + public long modificationTime() + throws IOException + { + return file.lastModified(); + } + + @Override + public boolean exists() + throws IOException + { + return file.exists(); + } + + @Override + public String location() + { + return file.getPath(); + } + + @Override + public String toString() + { + return file.getPath(); + } +} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/MemoryRcFileDataSource.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInput.java similarity index 59% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/MemoryRcFileDataSource.java rename to lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInput.java index 09023eb1c9e8..85a185ef8d56 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/MemoryRcFileDataSource.java +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInput.java @@ -11,57 +11,54 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.filesystem.memory; import io.airlift.slice.Slice; +import io.trino.filesystem.TrinoInput; +import org.apache.iceberg.io.SeekableInputStream; +import static java.lang.Math.min; import static java.lang.Math.toIntExact; import static java.util.Objects.requireNonNull; -public class MemoryRcFileDataSource - implements RcFileDataSource +class MemoryInput + implements TrinoInput { - private final RcFileDataSourceId id; + private final String location; private final Slice data; - private long readBytes; - public MemoryRcFileDataSource(RcFileDataSourceId id, Slice data) + public MemoryInput(String location, Slice data) { - this.id = requireNonNull(id, "id is null"); + this.location = requireNonNull(location, "location is null"); this.data = requireNonNull(data, "data is null"); } @Override - public RcFileDataSourceId getId() + public SeekableInputStream inputStream() { - return id; + return new MemorySeekableInputStream(data); } @Override - public long getReadBytes() + public void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength) { - return readBytes; + data.getBytes(toIntExact(position), buffer, bufferOffset, bufferLength); } @Override - public long getReadTimeNanos() + public int readTail(byte[] buffer, int bufferOffset, int bufferLength) { - return 0; + int readSize = min(data.length(), bufferLength); + readFully(data.length() - readSize, buffer, bufferOffset, readSize); + return readSize; } @Override - public long getSize() - { - return data.length(); - } + public void close() {} @Override - public void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength) + public String toString() { - data.getBytes(toIntExact(position), buffer, bufferOffset, bufferLength); - readBytes += bufferLength; + return location; } - - @Override - public void close() {} } diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInputFile.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInputFile.java new file mode 100644 index 000000000000..81b66ce986e4 --- /dev/null +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryInputFile.java @@ -0,0 +1,75 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.memory; + +import io.airlift.slice.Slice; +import io.trino.filesystem.TrinoInput; +import io.trino.filesystem.TrinoInputFile; + +import java.io.IOException; + +import static java.util.Objects.requireNonNull; + +public class MemoryInputFile + implements TrinoInputFile +{ + private final String location; + private final Slice data; + + public MemoryInputFile(String location, Slice data) + { + this.location = requireNonNull(location, "location is null"); + this.data = requireNonNull(data, "data is null"); + } + + @Override + public TrinoInput newInput() + throws IOException + { + return new MemoryInput(location, data); + } + + @Override + public long length() + throws IOException + { + return data.length(); + } + + @Override + public long modificationTime() + throws IOException + { + return 0; + } + + @Override + public boolean exists() + throws IOException + { + return true; + } + + @Override + public String location() + { + return location; + } + + @Override + public String toString() + { + return location; + } +} diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemorySeekableInputStream.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemorySeekableInputStream.java new file mode 100644 index 000000000000..966f0eb176da --- /dev/null +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemorySeekableInputStream.java @@ -0,0 +1,62 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.memory; + +import io.airlift.slice.Slice; +import io.airlift.slice.SliceInput; +import org.apache.iceberg.io.SeekableInputStream; + +import java.io.IOException; + +public class MemorySeekableInputStream + extends SeekableInputStream +{ + private final SliceInput input; + + public MemorySeekableInputStream(Slice data) + { + input = data.getInput(); + } + + @Override + public long getPos() + { + return input.position(); + } + + @Override + public void seek(long newPos) + { + input.setPosition(newPos); + } + + @Override + public int read() + throws IOException + { + return input.read(); + } + + @Override + public int read(byte[] destination, int destinationIndex, int length) + { + return input.read(destination, destinationIndex, length); + } + + @Override + public long skip(long length) + { + return input.skip(length); + } +} diff --git a/lib/trino-hive-formats/pom.xml b/lib/trino-hive-formats/pom.xml index 60d21a8bb901..03d915cb137e 100644 --- a/lib/trino-hive-formats/pom.xml +++ b/lib/trino-hive-formats/pom.xml @@ -18,6 +18,11 @@ + + io.trino + trino-filesystem + + io.trino trino-hadoop-toolkit @@ -65,6 +70,11 @@ joda-time + + org.apache.iceberg + iceberg-api + + org.openjdk.jol jol-core diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/DataOutputStream.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/DataOutputStream.java new file mode 100644 index 000000000000..a6aaae2ab218 --- /dev/null +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/DataOutputStream.java @@ -0,0 +1,331 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.hive.formats; + +import io.airlift.slice.Slice; +import io.airlift.slice.Slices; +import org.openjdk.jol.info.ClassLayout; + +import java.io.Closeable; +import java.io.DataOutput; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Arrays; + +import static com.google.common.base.Preconditions.checkArgument; +import static io.airlift.slice.SizeOf.SIZE_OF_BYTE; +import static io.airlift.slice.SizeOf.SIZE_OF_INT; +import static io.airlift.slice.SizeOf.SIZE_OF_LONG; +import static io.airlift.slice.SizeOf.SIZE_OF_SHORT; +import static java.lang.Math.toIntExact; + +public final class DataOutputStream + extends OutputStream + implements DataOutput +{ + private static final int DEFAULT_BUFFER_SIZE = 4 * 1024; + private static final int MINIMUM_CHUNK_SIZE = 1024; + + private static final int INSTANCE_SIZE = toIntExact(ClassLayout.parseClass(DataOutputStream.class).instanceSize()); + + private final OutputStream outputStream; + + private final Slice slice; + private final byte[] buffer; + + /** + * Offset of buffer within stream. + */ + private long bufferOffset; + /** + * Current position for writing in buffer. + */ + private int bufferPosition; + + public DataOutputStream(OutputStream inputStream) + { + this(inputStream, DEFAULT_BUFFER_SIZE); + } + + public DataOutputStream(OutputStream outputStream, int bufferSize) + { + checkArgument(bufferSize >= MINIMUM_CHUNK_SIZE, "minimum buffer size of " + MINIMUM_CHUNK_SIZE + " required"); + if (outputStream == null) { + throw new NullPointerException("outputStream is null"); + } + + this.outputStream = outputStream; + this.buffer = new byte[bufferSize]; + this.slice = Slices.wrappedBuffer(buffer); + } + + @Override + public void flush() + throws IOException + { + flushBufferToOutputStream(); + outputStream.flush(); + } + + @Override + public void close() + throws IOException + { + try (Closeable ignored = outputStream) { + flushBufferToOutputStream(); + } + } + + public long longSize() + { + return bufferOffset + bufferPosition; + } + + public long getRetainedSize() + { + return slice.getRetainedSize() + INSTANCE_SIZE; + } + + @Override + public void writeBoolean(boolean value) + throws IOException + { + writeByte(value ? 1 : 0); + } + + @Override + public void write(int value) + throws IOException + { + writeByte(value); + } + + @Override + public void writeByte(int value) + throws IOException + { + ensureWritableBytes(SIZE_OF_BYTE); + slice.setByte(bufferPosition, value); + bufferPosition += SIZE_OF_BYTE; + } + + @Override + public void writeShort(int value) + throws IOException + { + ensureWritableBytes(SIZE_OF_SHORT); + slice.setShort(bufferPosition, value); + bufferPosition += SIZE_OF_SHORT; + } + + @Override + public void writeInt(int value) + throws IOException + { + ensureWritableBytes(SIZE_OF_INT); + slice.setInt(bufferPosition, value); + bufferPosition += SIZE_OF_INT; + } + + @Override + public void writeLong(long value) + throws IOException + { + ensureWritableBytes(SIZE_OF_LONG); + slice.setLong(bufferPosition, value); + bufferPosition += SIZE_OF_LONG; + } + + @Override + public void writeFloat(float value) + throws IOException + { + writeInt(Float.floatToIntBits(value)); + } + + @Override + public void writeDouble(double value) + throws IOException + { + writeLong(Double.doubleToLongBits(value)); + } + + public void write(Slice source) + throws IOException + { + write(source, 0, source.length()); + } + + public void write(Slice source, int sourceIndex, int length) + throws IOException + { + // Write huge chunks direct to OutputStream + if (length >= MINIMUM_CHUNK_SIZE) { + flushBufferToOutputStream(); + writeToOutputStream(source, sourceIndex, length); + bufferOffset += length; + } + else { + ensureWritableBytes(length); + slice.setBytes(bufferPosition, source, sourceIndex, length); + bufferPosition += length; + } + } + + @Override + public void write(byte[] source) + throws IOException + { + write(source, 0, source.length); + } + + @Override + public void write(byte[] source, int sourceIndex, int length) + throws IOException + { + // Write huge chunks direct to OutputStream + if (length >= MINIMUM_CHUNK_SIZE) { + flushBufferToOutputStream(); + writeToOutputStream(source, sourceIndex, length); + bufferOffset += length; + } + else { + ensureWritableBytes(length); + slice.setBytes(bufferPosition, source, sourceIndex, length); + bufferPosition += length; + } + } + + public void write(InputStream in, int length) + throws IOException + { + while (length > 0) { + int batch = ensureBatchSize(length); + slice.setBytes(bufferPosition, in, batch); + bufferPosition += batch; + length -= batch; + } + } + + public void writeZero(int length) + throws IOException + { + checkArgument(length >= 0, "length must be 0 or greater than 0."); + + while (length > 0) { + int batch = ensureBatchSize(length); + Arrays.fill(buffer, bufferPosition, bufferPosition + batch, (byte) 0); + bufferPosition += batch; + length -= batch; + } + } + + @Override + public String toString() + { + StringBuilder builder = new StringBuilder("OutputStreamSliceOutputAdapter{"); + builder.append("outputStream=").append(outputStream); + builder.append("bufferSize=").append(slice.length()); + builder.append('}'); + return builder.toString(); + } + + private void ensureWritableBytes(int minWritableBytes) + throws IOException + { + if (bufferPosition + minWritableBytes > slice.length()) { + flushBufferToOutputStream(); + } + } + + private int ensureBatchSize(int length) + throws IOException + { + ensureWritableBytes(Math.min(MINIMUM_CHUNK_SIZE, length)); + return Math.min(length, slice.length() - bufferPosition); + } + + private void flushBufferToOutputStream() + throws IOException + { + writeToOutputStream(buffer, 0, bufferPosition); + bufferOffset += bufferPosition; + bufferPosition = 0; + } + + private void writeToOutputStream(byte[] source, int sourceIndex, int length) + throws IOException + { + outputStream.write(source, sourceIndex, length); + } + + private void writeToOutputStream(Slice source, int sourceIndex, int length) + throws IOException + { + source.getBytes(sourceIndex, outputStream, length); + } + + // + // Unsupported operations + // + + /** + * Unsupported operation + * + * @throws UnsupportedOperationException always + */ + @Override + @Deprecated + public void writeChar(int value) + { + throw new UnsupportedOperationException(); + } + + /** + * Unsupported operation + * + * @throws UnsupportedOperationException always + */ + @Override + @Deprecated + public void writeChars(String s) + { + throw new UnsupportedOperationException(); + } + + /** + * Unsupported operation + * + * @throws UnsupportedOperationException always + */ + @Override + @Deprecated + public void writeUTF(String s) + { + throw new UnsupportedOperationException(); + } + + /** + * Unsupported operation + * + * @throws UnsupportedOperationException always + */ + @Override + @Deprecated + public void writeBytes(String s) + { + throw new UnsupportedOperationException(); + } +} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/DataSeekableInputStream.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/DataSeekableInputStream.java new file mode 100644 index 000000000000..2ec6d94e0a9f --- /dev/null +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/DataSeekableInputStream.java @@ -0,0 +1,457 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.hive.formats; + +import io.airlift.slice.Slice; +import io.airlift.slice.Slices; +import org.apache.iceberg.io.SeekableInputStream; +import org.openjdk.jol.info.ClassLayout; + +import java.io.DataInput; +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Verify.verify; +import static io.airlift.slice.SizeOf.SIZE_OF_BYTE; +import static io.airlift.slice.SizeOf.SIZE_OF_INT; +import static io.airlift.slice.SizeOf.SIZE_OF_LONG; +import static io.airlift.slice.SizeOf.SIZE_OF_SHORT; +import static io.airlift.slice.SizeOf.sizeOf; +import static java.lang.Math.toIntExact; +import static java.util.Objects.requireNonNull; + +public final class DataSeekableInputStream + extends InputStream + implements DataInput +{ + private static final int INSTANCE_SIZE = toIntExact(ClassLayout.parseClass(DataSeekableInputStream.class).instanceSize()); + private static final int DEFAULT_BUFFER_SIZE = 4 * 1024; + private static final int MINIMUM_CHUNK_SIZE = 1024; + + private final SeekableInputStream inputStream; + private long readTimeNanos; + private long readBytes; + + private final byte[] buffer; + private final Slice slice; + /** + * Offset of buffer within stream. + */ + private long bufferOffset; + /** + * Current position for reading from buffer. + */ + private int bufferPosition; + + private int bufferFill; + + public DataSeekableInputStream(SeekableInputStream inputStream) + { + this(inputStream, DEFAULT_BUFFER_SIZE); + } + + public DataSeekableInputStream(SeekableInputStream inputStream, int bufferSize) + { + requireNonNull(inputStream, "inputStream is null"); + checkArgument(bufferSize >= MINIMUM_CHUNK_SIZE, "minimum buffer size of " + MINIMUM_CHUNK_SIZE + " required"); + + this.inputStream = inputStream; + this.buffer = new byte[bufferSize]; + this.slice = Slices.wrappedBuffer(buffer); + } + + public long getReadTimeNanos() + { + return readTimeNanos; + } + + public long getReadBytes() + { + return readBytes; + } + + public long getPos() + throws IOException + { + return checkedCast(bufferOffset + bufferPosition); + } + + public void seek(long newPos) + throws IOException + { + // todo check if new position is within the current buffer + + // drop current buffer + bufferPosition = 0; + bufferFill = 0; + + // skip the rest in inputStream + inputStream.seek(newPos); + + // update buffer offset to the new position + bufferOffset = newPos; + + verify(newPos == getPos()); + } + + @Override + public int available() + throws IOException + { + if (bufferPosition < bufferFill) { + return availableBytes(); + } + + return fillBuffer(); + } + + @Override + public int skipBytes(int n) + throws IOException + { + return (int) skip(n); + } + + @Override + public boolean readBoolean() + throws IOException + { + return readByte() != 0; + } + + @Override + public byte readByte() + throws IOException + { + ensureAvailable(SIZE_OF_BYTE); + byte v = slice.getByte(bufferPosition); + bufferPosition += SIZE_OF_BYTE; + return v; + } + + @Override + public int readUnsignedByte() + throws IOException + { + return readByte() & 0xFF; + } + + @Override + public short readShort() + throws IOException + { + ensureAvailable(SIZE_OF_SHORT); + short v = slice.getShort(bufferPosition); + bufferPosition += SIZE_OF_SHORT; + return v; + } + + @Override + public int readUnsignedShort() + throws IOException + { + return readShort() & 0xFFFF; + } + + @Override + public int readInt() + throws IOException + { + ensureAvailable(SIZE_OF_INT); + int v = slice.getInt(bufferPosition); + bufferPosition += SIZE_OF_INT; + return v; + } + + /** + * Gets an unsigned 32-bit integer at the current {@code position} + * and increases the {@code position} by {@code 4} in this buffer. + * + * @throws IndexOutOfBoundsException if {@code this.available()} is less than {@code 4} + */ + public long readUnsignedInt() + throws IOException + { + return readInt() & 0xFFFFFFFFL; + } + + @Override + public long readLong() + throws IOException + { + ensureAvailable(SIZE_OF_LONG); + long v = slice.getLong(bufferPosition); + bufferPosition += SIZE_OF_LONG; + return v; + } + + @Override + public float readFloat() + throws IOException + { + return Float.intBitsToFloat(readInt()); + } + + @Override + public double readDouble() + throws IOException + { + return Double.longBitsToDouble(readLong()); + } + + @Override + public int read() + throws IOException + { + if (available() == 0) { + return -1; + } + + verify(availableBytes() > 0); + int v = slice.getByte(bufferPosition) & 0xFF; + bufferPosition += SIZE_OF_BYTE; + return v; + } + + @Override + public long skip(long length) + throws IOException + { + int availableBytes = availableBytes(); + // is skip within the current buffer? + if (availableBytes >= length) { + bufferPosition += length; + return length; + } + + // drop current buffer + bufferPosition = bufferFill; + + // skip the rest in inputStream + long start = System.nanoTime(); + long inputStreamSkip = inputStream.skip(length - availableBytes); + readTimeNanos += System.nanoTime() - start; + readBytes += inputStreamSkip; + + bufferOffset += inputStreamSkip; + return availableBytes + inputStreamSkip; + } + + @Override + public int read(byte[] destination) + throws IOException + { + return read(destination, 0, destination.length); + } + + @Override + public int read(byte[] destination, int destinationIndex, int length) + throws IOException + { + if (available() == 0) { + return -1; + } + + verify(availableBytes() > 0); + int batch = Math.min(availableBytes(), length); + slice.getBytes(bufferPosition, destination, destinationIndex, batch); + bufferPosition += batch; + return batch; + } + + @Override + public void readFully(byte[] destination) + throws IOException + { + readFully(destination, 0, destination.length); + } + + @Override + public void readFully(byte[] destination, int destinationIndex, int length) + throws IOException + { + while (length > 0) { + int batch = Math.min(availableBytes(), length); + slice.getBytes(bufferPosition, destination, destinationIndex, batch); + + bufferPosition += batch; + destinationIndex += batch; + length -= batch; + + ensureAvailable(Math.min(length, MINIMUM_CHUNK_SIZE)); + } + } + + public Slice readSlice(int length) + throws IOException + { + if (length == 0) { + return Slices.EMPTY_SLICE; + } + + Slice newSlice = Slices.allocate(length); + readFully(newSlice, 0, length); + return newSlice; + } + + public void readFully(Slice destination) + throws IOException + { + readFully(destination, 0, destination.length()); + } + + public void readFully(Slice destination, int destinationIndex, int length) + throws IOException + { + while (length > 0) { + int batch = Math.min(availableBytes(), length); + slice.getBytes(bufferPosition, destination, destinationIndex, batch); + + bufferPosition += batch; + destinationIndex += batch; + length -= batch; + + ensureAvailable(Math.min(length, MINIMUM_CHUNK_SIZE)); + } + } + + public void readFully(OutputStream out, int length) + throws IOException + { + while (length > 0) { + int batch = Math.min(availableBytes(), length); + out.write(buffer, bufferPosition, batch); + + bufferPosition += batch; + length -= batch; + + ensureAvailable(Math.min(length, MINIMUM_CHUNK_SIZE)); + } + } + + @Override + public void close() + throws IOException + { + inputStream.close(); + } + + public long getRetainedSize() + { + return INSTANCE_SIZE + sizeOf(buffer); + } + + private int availableBytes() + { + return bufferFill - bufferPosition; + } + + private void ensureAvailable(int size) + throws IOException + { + if (bufferPosition + size < bufferFill) { + return; + } + + if (fillBuffer() < size) { + throw new EOFException("End of stream"); + } + } + + private int fillBuffer() + throws IOException + { + // Keep the rest + int rest = bufferFill - bufferPosition; + // Use System.arraycopy for small copies + System.arraycopy(buffer, bufferPosition, buffer, 0, rest); + + bufferFill = rest; + bufferOffset += bufferPosition; + bufferPosition = 0; + // Fill buffer with a minimum of bytes + long start = System.nanoTime(); + while (bufferFill < MINIMUM_CHUNK_SIZE) { + int bytesRead = inputStream.read(buffer, bufferFill, buffer.length - bufferFill); + if (bytesRead < 0) { + break; + } + + readBytes += bytesRead; + bufferFill += bytesRead; + } + readTimeNanos += System.nanoTime() - start; + + return bufferFill; + } + + private static int checkedCast(long value) + { + int result = (int) value; + checkArgument(result == value, "Size is greater than maximum int value"); + return result; + } + + // + // Unsupported operations + // + + @Override + @SuppressWarnings("NonSynchronizedMethodOverridesSynchronizedMethod") + @Deprecated + public void mark(int readLimit) + { + throw new UnsupportedOperationException(); + } + + @Override + @SuppressWarnings("NonSynchronizedMethodOverridesSynchronizedMethod") + @Deprecated + public void reset() + + { + throw new UnsupportedOperationException(); + } + + @Override + @Deprecated + public boolean markSupported() + { + throw new UnsupportedOperationException(); + } + + @Override + @Deprecated + public char readChar() + { + throw new UnsupportedOperationException(); + } + + @Override + @Deprecated + public String readLine() + { + throw new UnsupportedOperationException(); + } + + @Override + @Deprecated + public String readUTF() + { + throw new UnsupportedOperationException(); + } +} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDataSource.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDataSource.java deleted file mode 100644 index 87a5702748a3..000000000000 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDataSource.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hive.formats.rcfile; - -import java.io.Closeable; -import java.io.IOException; - -public interface RcFileDataSource - extends Closeable -{ - RcFileDataSourceId getId(); - - long getReadBytes(); - - long getReadTimeNanos(); - - long getSize(); - - void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength) - throws IOException; -} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDataSourceId.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDataSourceId.java deleted file mode 100644 index 6ce41fe852fe..000000000000 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDataSourceId.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hive.formats.rcfile; - -import java.util.Objects; - -import static java.util.Objects.requireNonNull; - -public final class RcFileDataSourceId -{ - private final String id; - - public RcFileDataSourceId(String id) - { - this.id = requireNonNull(id, "id is null"); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - RcFileDataSourceId that = (RcFileDataSourceId) o; - return Objects.equals(id, that.id); - } - - @Override - public int hashCode() - { - return Objects.hash(id); - } - - @Override - public String toString() - { - return id; - } -} diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDecoderUtils.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDecoderUtils.java index 8366aa2a7f33..e044e658ce3b 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDecoderUtils.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDecoderUtils.java @@ -17,6 +17,10 @@ import io.airlift.slice.SliceInput; import io.airlift.slice.SliceOutput; import io.airlift.slice.Slices; +import io.trino.filesystem.TrinoInput; +import io.trino.filesystem.TrinoInputFile; +import io.trino.hive.formats.DataOutputStream; +import io.trino.hive.formats.DataSeekableInputStream; import io.trino.spi.type.CharType; import io.trino.spi.type.Type; import io.trino.spi.type.VarcharType; @@ -67,6 +71,23 @@ public static boolean isNegativeVInt(byte value) return value < -120 || (value >= -112 && value < 0); } + public static long readVInt(DataSeekableInputStream in) + throws IOException + { + byte firstByte = in.readByte(); + int length = decodeVIntSize(firstByte); + if (length == 1) { + return firstByte; + } + + long value = 0; + for (int i = 1; i < length; i++) { + value <<= 8; + value |= (in.readByte() & 0xFF); + } + return isNegativeVInt(firstByte) ? ~value : value; + } + public static long readVInt(SliceInput in) { byte firstByte = in.readByte(); @@ -116,13 +137,13 @@ private static long readVIntInternal(Slice slice, int start, int length) /** * Find the beginning of the first full sync sequence that starts within the specified range. */ - public static long findFirstSyncPosition(RcFileDataSource dataSource, long offset, long length, long syncFirst, long syncSecond) + public static long findFirstSyncPosition(TrinoInputFile inputFile, long offset, long length, long syncFirst, long syncSecond) throws IOException { - requireNonNull(dataSource, "dataSource is null"); + requireNonNull(inputFile, "inputFile is null"); checkArgument(offset >= 0, "offset is negative"); checkArgument(length >= 1, "length must be at least 1"); - checkArgument(offset + length <= dataSource.getSize(), "offset plus length is greater than data size"); + checkArgument(offset + length <= inputFile.length(), "offset plus length is greater than data size"); // The full sync sequence is "0xFFFFFFFF syncFirst syncSecond". If // this sequence begins the file range, the start position is returned @@ -138,36 +159,69 @@ public static long findFirstSyncPosition(RcFileDataSource dataSource, long offse // this causes a re-read of SYNC_SEQUENCE_LENGTH bytes each time, but is much simpler code byte[] buffer = new byte[toIntExact(min(1 << 22, length + (SYNC_SEQUENCE_LENGTH - 1)))]; Slice bufferSlice = Slices.wrappedBuffer(buffer); - for (long position = 0; position < length; position += bufferSlice.length() - (SYNC_SEQUENCE_LENGTH - 1)) { - // either fill the buffer entirely, or read enough to allow all bytes in offset + length to be a start sequence - int bufferSize = toIntExact(min(buffer.length, length + (SYNC_SEQUENCE_LENGTH - 1) - position)); - // don't read off the end of the file - bufferSize = toIntExact(min(bufferSize, dataSource.getSize() - offset - position)); - - dataSource.readFully(offset + position, buffer, 0, bufferSize); - - // find the starting index position of the sync sequence - int index = bufferSlice.indexOf(sync); - if (index >= 0) { - // If the starting position is before the end of the search region, return the - // absolute start position of the sequence. - if (position + index < length) { - long startOfSyncSequence = offset + position + index; - return startOfSyncSequence; + try (TrinoInput input = inputFile.newInput()) { + for (long position = 0; position < length; position += bufferSlice.length() - (SYNC_SEQUENCE_LENGTH - 1)) { + // either fill the buffer entirely, or read enough to allow all bytes in offset + length to be a start sequence + int bufferSize = toIntExact(min(buffer.length, length + (SYNC_SEQUENCE_LENGTH - 1) - position)); + // don't read off the end of the file + bufferSize = toIntExact(min(bufferSize, inputFile.length() - offset - position)); + + input.readFully(offset + position, buffer, 0, bufferSize); + + // find the starting index position of the sync sequence + int index = bufferSlice.indexOf(sync); + if (index >= 0) { + // If the starting position is before the end of the search region, return the + // absolute start position of the sequence. + if (position + index < length) { + long startOfSyncSequence = offset + position + index; + return startOfSyncSequence; + } + // Otherwise, this is not a match for this region + // Note: this case isn't strictly needed as the loop will exit, but it is + // simpler to explicitly call it out. + return -1; } - // Otherwise, this is not a match for this region - // Note: this case isn't strictly needed as the loop will exit, but it is - // simpler to explicitly call it out. - return -1; } } return -1; } - public static void writeLengthPrefixedString(SliceOutput out, Slice slice) + public static void writeLengthPrefixedString(DataOutputStream out, Slice slice) + throws IOException { writeVInt(out, slice.length()); - out.writeBytes(slice); + out.write(slice); + } + + public static void writeVInt(DataOutputStream out, int value) + throws IOException + { + if (value >= -112 && value <= 127) { + out.writeByte(value); + return; + } + + int length = -112; + if (value < 0) { + value ^= -1; // take one's complement' + length = -120; + } + + int tmp = value; + while (tmp != 0) { + tmp = tmp >> 8; + length--; + } + + out.writeByte(length); + + length = (length < -120) ? -(length + 120) : -(length + 112); + + for (int idx = length; idx != 0; idx--) { + int shiftBits = (idx - 1) * 8; + out.writeByte((value >> shiftBits) & 0xFF); + } } public static void writeVInt(SliceOutput out, int value) diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java index 198046ac6c86..627bccb1c7c8 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java @@ -15,14 +15,10 @@ import com.google.common.collect.ImmutableMap; import io.airlift.slice.BasicSliceInput; -import io.airlift.slice.ChunkedSliceInput; -import io.airlift.slice.ChunkedSliceInput.BufferReference; -import io.airlift.slice.ChunkedSliceInput.SliceLoader; import io.airlift.slice.Slice; -import io.airlift.slice.SliceInput; import io.airlift.slice.Slices; -import io.airlift.units.DataSize; -import io.airlift.units.DataSize.Unit; +import io.trino.filesystem.TrinoInputFile; +import io.trino.hive.formats.DataSeekableInputStream; import io.trino.hive.formats.compression.CompressionKind; import io.trino.hive.formats.compression.ValueDecompressor; import io.trino.hive.formats.rcfile.RcFileWriteValidation.WriteChecksum; @@ -34,7 +30,6 @@ import java.io.Closeable; import java.io.IOException; -import java.io.UncheckedIOException; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -71,9 +66,10 @@ public class RcFileReader private static final String COLUMN_COUNT_METADATA_KEY = "hive.io.rcfile.column.number"; - private final RcFileDataSource dataSource; + private final String location; + private final long fileSize; private final Map readColumns; - private final ChunkedSliceInput input; + private final DataSeekableInputStream input; private final long length; private final byte version; @@ -105,47 +101,47 @@ public class RcFileReader private final Optional writeChecksumBuilder; public RcFileReader( - RcFileDataSource dataSource, + TrinoInputFile inputFile, RcFileEncoding encoding, Map readColumns, long offset, - long length, - DataSize bufferSize) + long length) throws IOException { - this(dataSource, encoding, readColumns, offset, length, bufferSize, Optional.empty()); + this(inputFile, encoding, readColumns, offset, length, Optional.empty()); } private RcFileReader( - RcFileDataSource dataSource, + TrinoInputFile inputFile, RcFileEncoding encoding, Map readColumns, long offset, long length, - DataSize bufferSize, Optional writeValidation) throws IOException { - this.dataSource = requireNonNull(dataSource, "dataSource is null"); + requireNonNull(inputFile, "inputFile is null"); + this.location = inputFile.location(); + this.fileSize = inputFile.length(); this.readColumns = ImmutableMap.copyOf(requireNonNull(readColumns, "readColumns is null")); - this.input = new ChunkedSliceInput(new DataSourceSliceLoader(dataSource), toIntExact(bufferSize.toBytes())); + this.input = new DataSeekableInputStream(inputFile.newInput().inputStream()); this.writeValidation = requireNonNull(writeValidation, "writeValidation is null"); this.writeChecksumBuilder = writeValidation.map(validation -> WriteChecksumBuilder.createWriteChecksumBuilder(readColumns)); verify(offset >= 0, "offset is negative"); - verify(offset < dataSource.getSize(), "offset is greater than data size"); + verify(offset < inputFile.length(), "offset is greater than data size"); verify(length >= 1, "length must be at least 1"); this.length = length; this.end = offset + length; - verify(end <= dataSource.getSize(), "offset plus length is greater than data size"); + verify(end <= fileSize, "offset plus length is greater than data size"); // read header Slice magic = input.readSlice(RCFILE_MAGIC.length()); boolean compressed; if (RCFILE_MAGIC.equals(magic)) { version = input.readByte(); - verify(version <= CURRENT_VERSION, "RCFile version %s not supported: %s", version, dataSource); + verify(version <= CURRENT_VERSION, "RCFile version %s not supported: %s", version, inputFile.location()); validateWrite(validation -> validation.getVersion() == version, "Unexpected file version"); compressed = input.readBoolean(); } @@ -154,23 +150,23 @@ else if (SEQUENCE_FILE_MAGIC.equals(magic)) { // first version of RCFile used magic SEQ with version 6 byte sequenceFileVersion = input.readByte(); - verify(sequenceFileVersion == SEQUENCE_FILE_VERSION, "File %s is a SequenceFile not an RCFile", dataSource); + verify(sequenceFileVersion == SEQUENCE_FILE_VERSION, "File %s is a SequenceFile not an RCFile", inputFile.location()); // this is the first version of RCFile this.version = FIRST_VERSION; Slice keyClassName = readLengthPrefixedString(input); Slice valueClassName = readLengthPrefixedString(input); - verify(RCFILE_KEY_BUFFER_NAME.equals(keyClassName) && RCFILE_VALUE_BUFFER_NAME.equals(valueClassName), "File %s is a SequenceFile not an RCFile", dataSource); + verify(RCFILE_KEY_BUFFER_NAME.equals(keyClassName) && RCFILE_VALUE_BUFFER_NAME.equals(valueClassName), "File %s is a SequenceFile not an RCFile", inputFile); compressed = input.readBoolean(); // RC file is never block compressed if (input.readBoolean()) { - throw corrupt("File %s is a SequenceFile not an RCFile", dataSource); + throw corrupt("File %s is a SequenceFile not an RCFile", inputFile.location()); } } else { - throw corrupt("File %s is not an RCFile", dataSource); + throw corrupt("File %s is not an RCFile", inputFile.location()); } // setup the compression codec @@ -186,8 +182,8 @@ else if (SEQUENCE_FILE_MAGIC.equals(magic)) { // read metadata int metadataEntries = Integer.reverseBytes(input.readInt()); - verify(metadataEntries >= 0, "Invalid metadata entry count %s in RCFile %s", metadataEntries, dataSource); - verify(metadataEntries <= MAX_METADATA_ENTRIES, "Too many metadata entries (%s) in RCFile %s", metadataEntries, dataSource); + verify(metadataEntries >= 0, "Invalid metadata entry count %s in RCFile %s", metadataEntries, inputFile.location()); + verify(metadataEntries <= MAX_METADATA_ENTRIES, "Too many metadata entries (%s) in RCFile %s", metadataEntries, inputFile.location()); ImmutableMap.Builder metadataBuilder = ImmutableMap.builder(); for (int i = 0; i < metadataEntries; i++) { metadataBuilder.put(readLengthPrefixedString(input).toStringUtf8(), readLengthPrefixedString(input).toStringUtf8()); @@ -197,16 +193,16 @@ else if (SEQUENCE_FILE_MAGIC.equals(magic)) { // get column count from metadata String columnCountString = metadata.get(COLUMN_COUNT_METADATA_KEY); - verify(columnCountString != null, "Column count not specified in metadata RCFile %s", dataSource); + verify(columnCountString != null, "Column count not specified in metadata RCFile %s", inputFile.location()); try { columnCount = Integer.parseInt(columnCountString); } catch (NumberFormatException e) { - throw corrupt("Invalid column count %s in RCFile %s", columnCountString, dataSource); + throw corrupt("Invalid column count %s in RCFile %s", columnCountString, inputFile.location()); } // initialize columns - verify(columnCount <= MAX_COLUMN_COUNT, "Too many columns (%s) in RCFile %s", columnCountString, dataSource); + verify(columnCount <= MAX_COLUMN_COUNT, "Too many columns (%s) in RCFile %s", columnCountString, inputFile.location()); columns = new Column[columnCount]; for (Entry entry : readColumns.entrySet()) { if (entry.getKey() < columnCount) { @@ -225,7 +221,12 @@ else if (SEQUENCE_FILE_MAGIC.equals(magic)) { // of the file. In that case, the reader owns all row groups up to the first sync point. if (offset != 0) { // if the specified file region does not contain the start of a sync sequence, this call will close the reader - seekToFirstRowGroupInRange(offset, length); + long startOfSyncSequence = RcFileDecoderUtils.findFirstSyncPosition(inputFile, offset, length, syncFirst, syncSecond); + if (startOfSyncSequence < 0) { + closeQuietly(); + return; + } + input.seek(startOfSyncSequence); } } @@ -251,7 +252,7 @@ public long getLength() public long getBytesRead() { - return dataSource.getReadBytes(); + return input.getReadBytes(); } public long getRowsRead() @@ -261,7 +262,7 @@ public long getRowsRead() public long getReadTimeNanos() { - return dataSource.getReadTimeNanos(); + return input.getReadTimeNanos(); } public Slice getSync() @@ -320,18 +321,18 @@ public int advance() } // are we at the end? - if (input.remaining() == 0) { + if (fileSize - input.getPos() == 0) { close(); return -1; } // read uncompressed size of row group (which is useless information) - verify(input.remaining() >= SIZE_OF_INT, "RCFile truncated %s", dataSource.getId()); + verify(fileSize - input.getPos() >= SIZE_OF_INT, "RCFile truncated %s", location); int unusedRowGroupSize = Integer.reverseBytes(input.readInt()); // read sequence sync if present if (unusedRowGroupSize == -1) { - verify(input.remaining() >= SIZE_OF_LONG + SIZE_OF_LONG + SIZE_OF_INT, "RCFile truncated %s", dataSource.getId()); + verify(fileSize - input.getPos() >= SIZE_OF_LONG + SIZE_OF_LONG + SIZE_OF_INT, "RCFile truncated %s", length); // The full sync sequence is "0xFFFFFFFF syncFirst syncSecond". If // this sequence begins in our segment, we must continue process until the @@ -339,12 +340,12 @@ public int advance() // We have already read the 0xFFFFFFFF above, so we must test the // end condition back 4 bytes. // NOTE: this decision must agree with RcFileDecoderUtils.findFirstSyncPosition - if (input.position() - SIZE_OF_INT >= end) { + if (input.getPos() - SIZE_OF_INT >= end) { close(); return -1; } - verify(syncFirst == input.readLong() && syncSecond == input.readLong(), "Invalid sync in RCFile %s", dataSource.getId()); + verify(syncFirst == input.readLong() && syncSecond == input.readLong(), "Invalid sync in RCFile %s", location); // read the useless uncompressed length unusedRowGroupSize = Integer.reverseBytes(input.readInt()); @@ -362,7 +363,7 @@ else if (rowsRead > 0) { } // use exact sized compressed header to avoid problems where compression algorithms over read Slice compressedHeader = compressedHeaderBuffer.slice(0, compressedHeaderSize); - input.readBytes(compressedHeader); + input.readFully(compressedHeader); // decompress row group header Slice header; @@ -377,7 +378,7 @@ else if (rowsRead > 0) { header = buffer; } else { - verify(compressedHeaderSize == uncompressedHeaderSize, "Invalid RCFile %s", dataSource.getId()); + verify(compressedHeaderSize == uncompressedHeaderSize, "Invalid RCFile %s", location); header = compressedHeader; } BasicSliceInput headerInput = header.getInput(); @@ -395,7 +396,7 @@ else if (rowsRead > 0) { totalCompressedDataSize += compressedDataSize; int uncompressedDataSize = toIntExact(RcFileDecoderUtils.readVInt(headerInput)); if (decompressor == null && compressedDataSize != uncompressedDataSize) { - throw corrupt("Invalid RCFile %s", dataSource.getId()); + throw corrupt("Invalid RCFile %s", location); } int lengthsSize = toIntExact(RcFileDecoderUtils.readVInt(headerInput)); @@ -434,20 +435,9 @@ public Block readBlock(int columnIndex) return columns[columnIndex].readBlock(rowGroupPosition, currentChunkRowCount); } - public RcFileDataSourceId getId() + public String getFileLocation() { - return dataSource.getId(); - } - - private void seekToFirstRowGroupInRange(long offset, long length) - throws IOException - { - long startOfSyncSequence = RcFileDecoderUtils.findFirstSyncPosition(dataSource, offset, length, syncFirst, syncSecond); - if (startOfSyncSequence < 0) { - closeQuietly(); - return; - } - input.setPosition(startOfSyncSequence); + return location; } private void closeQuietly() @@ -459,8 +449,8 @@ private void closeQuietly() } } - private Slice readLengthPrefixedString(SliceInput in) - throws RcFileCorruptionException + private Slice readLengthPrefixedString(DataSeekableInputStream in) + throws IOException { int length = toIntExact(RcFileDecoderUtils.readVInt(in)); verify(length <= MAX_METADATA_STRING_LENGTH, "Metadata string value is too long (%s) in RCFile %s", length, in); @@ -508,7 +498,7 @@ private void validateWritePageChecksum() static void validateFile( RcFileWriteValidation writeValidation, - RcFileDataSource input, + TrinoInputFile inputFile, RcFileEncoding encoding, List types) throws RcFileCorruptionException @@ -518,12 +508,11 @@ static void validateFile( readTypes.put(columnIndex, types.get(columnIndex)); } try (RcFileReader rcFileReader = new RcFileReader( - input, + inputFile, encoding, readTypes.buildOrThrow(), 0, - input.getSize(), - DataSize.of(8, Unit.MEGABYTE), + inputFile.length(), Optional.of(writeValidation))) { while (rcFileReader.advance() >= 0) { // ignored @@ -659,73 +648,4 @@ private Slice getDataBuffer() return dataBuffer; } } - - private static class DataSourceSliceLoader - implements SliceLoader - { - private final RcFileDataSource dataSource; - - public DataSourceSliceLoader(RcFileDataSource dataSource) - { - this.dataSource = dataSource; - } - - @Override - public ByteArrayBufferReference createBuffer(int bufferSize) - { - return new ByteArrayBufferReference(bufferSize); - } - - @Override - public long getSize() - { - return dataSource.getSize(); - } - - @Override - public void load(long position, ByteArrayBufferReference bufferReference, int length) - { - try { - dataSource.readFully(position, bufferReference.getByteBuffer(), 0, length); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - @Override - public void close() - { - try { - dataSource.close(); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } - } - - private static class ByteArrayBufferReference - implements BufferReference - { - private final byte[] byteBuffer; - private final Slice sliceBuffer; - - public ByteArrayBufferReference(int size) - { - byteBuffer = new byte[size]; - sliceBuffer = Slices.wrappedBuffer(byteBuffer); - } - - public byte[] getByteBuffer() - { - return byteBuffer; - } - - @Override - public Slice getSlice() - { - return sliceBuffer; - } - } } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java index fd37bbf31679..3e93e68784cc 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java @@ -18,6 +18,8 @@ import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; import io.airlift.units.DataSize; +import io.trino.filesystem.TrinoInputFile; +import io.trino.hive.formats.DataOutputStream; import io.trino.hive.formats.compression.Codec; import io.trino.hive.formats.compression.CompressionKind; import io.trino.hive.formats.compression.MemoryCompressedSliceOutput; @@ -31,6 +33,7 @@ import java.io.Closeable; import java.io.IOException; +import java.io.OutputStream; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -69,7 +72,7 @@ public class RcFileWriter PRESTO_RCFILE_WRITER_VERSION = version == null ? "UNKNOWN" : version; } - private final SliceOutput output; + private final DataOutputStream output; private final List types; private final RcFileEncoding encoding; @@ -91,7 +94,7 @@ public class RcFileWriter private final RcFileWriteValidationBuilder validationBuilder; public RcFileWriter( - SliceOutput output, + OutputStream rawOutput, List types, RcFileEncoding encoding, Optional compressionKind, @@ -100,7 +103,7 @@ public RcFileWriter( throws IOException { this( - output, + rawOutput, types, encoding, compressionKind, @@ -111,7 +114,7 @@ public RcFileWriter( } public RcFileWriter( - SliceOutput output, + OutputStream rawOutput, List types, RcFileEncoding encoding, Optional compressionKind, @@ -121,7 +124,7 @@ public RcFileWriter( boolean validate) throws IOException { - requireNonNull(output, "output is null"); + requireNonNull(rawOutput, "rawOutput is null"); requireNonNull(types, "types is null"); checkArgument(!types.isEmpty(), "types is empty"); requireNonNull(encoding, "encoding is null"); @@ -135,18 +138,20 @@ public RcFileWriter( this.validationBuilder = validate ? new RcFileWriteValidationBuilder(types) : null; - this.output = output; + this.output = new DataOutputStream(rawOutput); this.types = types; this.encoding = encoding; // write header - output.writeBytes(RCFILE_MAGIC); + output.write(RCFILE_MAGIC); output.writeByte(CURRENT_VERSION); recordValidation(validation -> validation.setVersion((byte) CURRENT_VERSION)); // write codec information output.writeBoolean(compressionKind.isPresent()); - compressionKind.map(CompressionKind::getHadoopClassName).ifPresent(name -> writeLengthPrefixedString(output, utf8Slice(name))); + if (compressionKind.isPresent()) { + writeLengthPrefixedString(output, utf8Slice(compressionKind.get().getHadoopClassName())); + } recordValidation(validation -> validation.setCodecClassName(compressionKind.map(CompressionKind::getHadoopClassName))); // write metadata @@ -178,6 +183,7 @@ public RcFileWriter( } private void writeMetadataProperty(String key, String value) + throws IOException { writeLengthPrefixedString(output, utf8Slice(key)); writeLengthPrefixedString(output, utf8Slice(value)); @@ -205,13 +211,13 @@ private void recordValidation(Consumer task) } } - public void validate(RcFileDataSource input) + public void validate(TrinoInputFile inputFile) throws RcFileCorruptionException { checkState(validationBuilder != null, "validation is not enabled"); validateFile( validationBuilder.build(), - input, + inputFile, encoding, types); } @@ -304,14 +310,14 @@ private void writeRowGroup() output.writeInt(Integer.reverseBytes(keySectionOutput.size())); output.writeInt(Integer.reverseBytes(keySectionOutput.getCompressedSize())); for (Slice slice : keySectionOutput.getCompressedSlices()) { - output.writeBytes(slice); + output.write(slice); } // write value section for (ColumnEncoder columnEncoder : columnEncoders) { List slices = columnEncoder.getCompressedData(); for (Slice slice : slices) { - output.writeBytes(slice); + output.write(slice); } columnEncoder.reset(); } diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestDataOutputStream.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestDataOutputStream.java new file mode 100644 index 000000000000..290e5a709714 --- /dev/null +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestDataOutputStream.java @@ -0,0 +1,234 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.hive.formats; + +import io.airlift.slice.Slice; +import io.airlift.slice.Slices; +import org.openjdk.jol.info.ClassLayout; +import org.testng.annotations.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.concurrent.ThreadLocalRandom; + +import static org.testng.Assert.assertEquals; + +public class TestDataOutputStream +{ + @Test + public void testEncodingBoolean() + throws Exception + { + assertEncoding(sliceOutput -> sliceOutput.writeBoolean(true), + new byte[] {1}); + assertEncoding(sliceOutput -> sliceOutput.writeBoolean(false), + new byte[] {0}); + } + + @Test + public void testEncodingByte() + throws Exception + { + assertEncoding(sliceOutput -> sliceOutput.writeByte(92), + new byte[] {92}); + assertEncoding(sliceOutput -> sliceOutput.writeByte(156), + new byte[] {-100}); + assertEncoding(sliceOutput -> sliceOutput.writeByte(-17), + new byte[] {-17}); + + assertEncoding(sliceOutput -> sliceOutput.write(92), + new byte[] {92}); + assertEncoding(sliceOutput -> sliceOutput.write(156), + new byte[] {-100}); + assertEncoding(sliceOutput -> sliceOutput.write(-17), + new byte[] {-17}); + } + + @Test + public void testEncodingShort() + throws Exception + { + assertEncoding(sliceOutput -> sliceOutput.writeShort(23661), + new byte[] {109, 92}); + assertEncoding(sliceOutput -> sliceOutput.writeShort(40045), + new byte[] {109, -100}); + assertEncoding(sliceOutput -> sliceOutput.writeShort(-27188), + new byte[] {-52, -107}); + } + + @Test + public void testEncodingInteger() + throws Exception + { + assertEncoding(sliceOutput -> sliceOutput.writeInt(978017389), + new byte[] {109, 92, 75, 58}); + assertEncoding(sliceOutput -> sliceOutput.writeInt(-7813904), + new byte[] {-16, -60, -120, -1}); + } + + @Test + public void testEncodingLong() + throws Exception + { + assertEncoding(sliceOutput -> sliceOutput.writeLong(9214541725452766769L), + new byte[] {49, -114, -96, -23, -32, -96, -32, 127}); + assertEncoding(sliceOutput -> sliceOutput.writeLong(-1184314682315678611L), + new byte[] {109, 92, 75, 58, 18, 120, -112, -17}); + } + + @Test + public void testEncodingDouble() + throws Exception + { + assertEncoding(sliceOutput -> sliceOutput.writeDouble(3.14), + new byte[] {31, -123, -21, 81, -72, 30, 9, 64}); + assertEncoding(sliceOutput -> sliceOutput.writeDouble(Double.NaN), + new byte[] {0, 0, 0, 0, 0, 0, -8, 127}); + assertEncoding(sliceOutput -> sliceOutput.writeDouble(Double.NEGATIVE_INFINITY), + new byte[] {0, 0, 0, 0, 0, 0, -16, -1}); + assertEncoding(sliceOutput -> sliceOutput.writeDouble(Double.POSITIVE_INFINITY), + new byte[] {0, 0, 0, 0, 0, 0, -16, 127}); + } + + @Test + public void testEncodingFloat() + throws Exception + { + assertEncoding(sliceOutput -> sliceOutput.writeFloat(3.14f), + new byte[] {-61, -11, 72, 64}); + assertEncoding(sliceOutput -> sliceOutput.writeFloat(Float.NaN), + new byte[] {0, 0, -64, 127}); + assertEncoding(sliceOutput -> sliceOutput.writeFloat(Float.NEGATIVE_INFINITY), + new byte[] {0, 0, -128, -1}); + assertEncoding(sliceOutput -> sliceOutput.writeFloat(Float.POSITIVE_INFINITY), + new byte[] {0, 0, -128, 127}); + } + + @Test + public void testEncodingBytes() + throws Exception + { + byte[] data = new byte[18000]; + ThreadLocalRandom.current().nextBytes(data); + + assertEncoding(sliceOutput -> sliceOutput.write(data), data); + assertEncoding(sliceOutput -> sliceOutput.write(data, 0, 0), Arrays.copyOfRange(data, 0, 0)); + assertEncoding(sliceOutput -> sliceOutput.write(data, 0, 3), Arrays.copyOfRange(data, 0, 3)); + assertEncoding(sliceOutput -> sliceOutput.write(data, 0, 370), Arrays.copyOfRange(data, 0, 370)); + assertEncoding(sliceOutput -> sliceOutput.write(data, 0, 4095), Arrays.copyOfRange(data, 0, 4095)); + assertEncoding(sliceOutput -> sliceOutput.write(data, 0, 4096), Arrays.copyOfRange(data, 0, 4096)); + assertEncoding(sliceOutput -> sliceOutput.write(data, 0, 12348), Arrays.copyOfRange(data, 0, 12348)); + assertEncoding(sliceOutput -> sliceOutput.write(data, 0, 16384), Arrays.copyOfRange(data, 0, 16384)); + assertEncoding(sliceOutput -> sliceOutput.write(data, 0, 18000), Arrays.copyOfRange(data, 0, 18000)); + } + + @Test + public void testEncodingSlice() + throws Exception + { + byte[] data = new byte[18000]; + ThreadLocalRandom.current().nextBytes(data); + Slice slice = Slices.wrappedBuffer(data); + + assertEncoding(sliceOutput -> sliceOutput.write(slice), data); + assertEncoding(sliceOutput -> sliceOutput.write(slice, 0, 0), Arrays.copyOfRange(data, 0, 0)); + assertEncoding(sliceOutput -> sliceOutput.write(slice, 0, 3), Arrays.copyOfRange(data, 0, 3)); + assertEncoding(sliceOutput -> sliceOutput.write(slice, 0, 370), Arrays.copyOfRange(data, 0, 370)); + assertEncoding(sliceOutput -> sliceOutput.write(slice, 0, 4095), Arrays.copyOfRange(data, 0, 4095)); + assertEncoding(sliceOutput -> sliceOutput.write(slice, 0, 4096), Arrays.copyOfRange(data, 0, 4096)); + assertEncoding(sliceOutput -> sliceOutput.write(slice, 0, 12348), Arrays.copyOfRange(data, 0, 12348)); + assertEncoding(sliceOutput -> sliceOutput.write(slice, 0, 16384), Arrays.copyOfRange(data, 0, 16384)); + assertEncoding(sliceOutput -> sliceOutput.write(slice, 0, 18000), Arrays.copyOfRange(data, 0, 18000)); + } + + @Test + public void testWriteZero() + throws Exception + { + assertEncoding(sliceOutput -> sliceOutput.writeZero(0), new byte[0]); + assertEncoding(sliceOutput -> sliceOutput.writeZero(1), new byte[1]); + assertEncoding(sliceOutput -> sliceOutput.writeZero(2), new byte[2]); + assertEncoding(sliceOutput -> sliceOutput.writeZero(3), new byte[3]); + assertEncoding(sliceOutput -> sliceOutput.writeZero(4), new byte[4]); + assertEncoding(sliceOutput -> sliceOutput.writeZero(6), new byte[6]); + assertEncoding(sliceOutput -> sliceOutput.writeZero(7), new byte[7]); + assertEncoding(sliceOutput -> sliceOutput.writeZero(8), new byte[8]); + assertEncoding(sliceOutput -> sliceOutput.writeZero(9), new byte[9]); + assertEncoding(sliceOutput -> sliceOutput.writeZero(16), new byte[16]); + assertEncoding(sliceOutput -> sliceOutput.writeZero(22), new byte[22]); + assertEncoding(sliceOutput -> sliceOutput.writeZero(227), new byte[227]); + assertEncoding(sliceOutput -> sliceOutput.writeZero(4227), new byte[4227]); + assertEncoding(sliceOutput -> sliceOutput.writeZero(18349), new byte[18349]); + } + + @Test + public void testRetainedSize() + throws IOException + { + int bufferSize = 1337; + DataOutputStream output = new DataOutputStream(new ByteArrayOutputStream(0), bufferSize); + + long originalRetainedSize = output.getRetainedSize(); + assertEquals(originalRetainedSize, ClassLayout.parseClass(DataOutputStream.class).instanceSize() + Slices.allocate(bufferSize).getRetainedSize()); + output.writeLong(0); + output.writeShort(0); + assertEquals(output.getRetainedSize(), originalRetainedSize); + } + + /** + * Asserting different offsets of operations. + */ + private static void assertEncoding(DataOutputTester operations, byte... expected) + throws IOException + { + assertEncoding(operations, 0, expected); + assertEncoding(operations, 1, expected); + assertEncoding(operations, 2, expected); + assertEncoding(operations, 3, expected); + assertEncoding(operations, 4, expected); + assertEncoding(operations, 7, expected); + assertEncoding(operations, 8, expected); + assertEncoding(operations, 16, expected); + assertEncoding(operations, 511, expected); + assertEncoding(operations, 12000, expected); + assertEncoding(operations, 13000, expected); + assertEncoding(operations, 16000, expected); + assertEncoding(operations, 16380, expected); + assertEncoding(operations, 16383, expected); + assertEncoding(operations, 16384, expected); + assertEncoding(operations, 18349, expected); + } + + private static void assertEncoding(DataOutputTester operations, int offset, byte... output) + throws IOException + { + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + try (DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream, 16384)) { + dataOutputStream.writeZero(offset); + operations.test(dataOutputStream); + assertEquals(dataOutputStream.longSize(), offset + output.length); + } + + byte[] expected = new byte[offset + output.length]; + System.arraycopy(output, 0, expected, offset, output.length); + assertEquals(byteArrayOutputStream.toByteArray(), expected); + } + + private interface DataOutputTester + { + void test(DataOutputStream dataOutputStream) + throws IOException; + } +} diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestDataSeekableInputStream.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestDataSeekableInputStream.java new file mode 100644 index 000000000000..8b91d6c820a3 --- /dev/null +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestDataSeekableInputStream.java @@ -0,0 +1,733 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.hive.formats; + +import com.google.common.collect.ImmutableList; +import com.google.common.io.ByteSource; +import com.google.common.io.ByteStreams; +import io.airlift.slice.Slice; +import io.airlift.slice.Slices; +import io.trino.filesystem.memory.MemorySeekableInputStream; +import org.apache.iceberg.io.SeekableInputStream; +import org.openjdk.jol.info.ClassLayout; +import org.testng.annotations.Test; + +import java.io.ByteArrayOutputStream; +import java.io.EOFException; +import java.io.IOException; +import java.util.List; + +import static com.google.common.collect.Iterables.cycle; +import static io.airlift.slice.SizeOf.SIZE_OF_BYTE; +import static io.airlift.slice.SizeOf.SIZE_OF_DOUBLE; +import static io.airlift.slice.SizeOf.SIZE_OF_FLOAT; +import static io.airlift.slice.SizeOf.SIZE_OF_INT; +import static io.airlift.slice.SizeOf.SIZE_OF_LONG; +import static io.airlift.slice.SizeOf.SIZE_OF_SHORT; +import static io.airlift.slice.SizeOf.sizeOfByteArray; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +@SuppressWarnings("resource") +public class TestDataSeekableInputStream +{ + private static final int BUFFER_SIZE = 129; + + private static final List VARIABLE_READ_SIZES = ImmutableList.of( + 1, + 7, + 15, + BUFFER_SIZE - 1, + BUFFER_SIZE, + BUFFER_SIZE + 1, + BUFFER_SIZE + 13); + + @Test + public void testReadBoolean() + throws IOException + { + testDataInput(new DataInputTester(SIZE_OF_BYTE) + { + @Override + public void loadValue(DataOutputStream output, int valueIndex) + throws IOException + { + output.writeBoolean(valueIndex % 2 == 0); + } + + @Override + public void verifyValue(DataSeekableInputStream input, int valueIndex) + throws IOException + { + assertEquals(input.readBoolean(), valueIndex % 2 == 0); + } + }); + } + + @Test + public void testReadByte() + throws IOException + { + testDataInput(new DataInputTester(SIZE_OF_BYTE) + { + @Override + public void loadValue(DataOutputStream output, int valueIndex) + throws IOException + { + output.writeByte((byte) valueIndex); + } + + @Override + public void verifyValue(DataSeekableInputStream input, int valueIndex) + throws IOException + { + assertEquals(input.readByte(), (byte) valueIndex); + } + }); + } + + @Test + public void testRead() + throws IOException + { + testDataInput(new DataInputTester(SIZE_OF_BYTE) + { + @Override + public void loadValue(DataOutputStream output, int valueIndex) + throws IOException + { + output.writeByte((byte) valueIndex); + } + + @Override + public void verifyValue(DataSeekableInputStream input, int valueIndex) + throws IOException + { + assertEquals(input.read(), valueIndex & 0xFF); + } + + @Override + public void verifyReadOffEnd(DataSeekableInputStream input) + throws IOException + { + assertEquals(input.read(), -1); + } + }); + } + + @Test + public void testReadShort() + throws IOException + { + testDataInput(new DataInputTester(SIZE_OF_SHORT) + { + @Override + public void loadValue(DataOutputStream output, int valueIndex) + throws IOException + { + output.writeShort(valueIndex); + } + + @Override + public void verifyValue(DataSeekableInputStream input, int valueIndex) + throws IOException + { + assertEquals(input.readShort(), (short) valueIndex); + } + }); + } + + @Test + public void testReadUnsignedShort() + throws IOException + { + testDataInput(new DataInputTester(SIZE_OF_SHORT) + { + @Override + public void loadValue(DataOutputStream output, int valueIndex) + throws IOException + { + output.writeShort(valueIndex); + } + + @Override + public void verifyValue(DataSeekableInputStream input, int valueIndex) + throws IOException + { + assertEquals(input.readUnsignedShort(), valueIndex & 0xFFF); + } + }); + } + + @Test + public void testReadInt() + throws IOException + { + testDataInput(new DataInputTester(SIZE_OF_INT) + { + @Override + public void loadValue(DataOutputStream output, int valueIndex) + throws IOException + { + output.writeInt(valueIndex); + } + + @Override + public void verifyValue(DataSeekableInputStream input, int valueIndex) + throws IOException + { + assertEquals(input.readInt(), valueIndex); + } + }); + } + + @Test + public void testUnsignedReadInt() + throws IOException + { + testDataInput(new DataInputTester(SIZE_OF_INT) + { + @Override + public void loadValue(DataOutputStream output, int valueIndex) + throws IOException + { + output.writeInt(valueIndex); + } + + @Override + public void verifyValue(DataSeekableInputStream input, int valueIndex) + throws IOException + { + assertEquals(input.readUnsignedInt(), valueIndex); + } + }); + } + + @Test + public void testReadLong() + throws IOException + { + testDataInput(new DataInputTester(SIZE_OF_LONG) + { + @Override + public void loadValue(DataOutputStream output, int valueIndex) + throws IOException + { + output.writeLong(valueIndex); + } + + @Override + public void verifyValue(DataSeekableInputStream input, int valueIndex) + throws IOException + { + assertEquals(input.readLong(), valueIndex); + } + }); + } + + @Test + public void testReadFloat() + throws IOException + { + testDataInput(new DataInputTester(SIZE_OF_FLOAT) + { + @Override + public void loadValue(DataOutputStream output, int valueIndex) + throws IOException + { + output.writeFloat(valueIndex + 0.12f); + } + + @Override + public void verifyValue(DataSeekableInputStream input, int valueIndex) + throws IOException + { + assertEquals(input.readFloat(), valueIndex + 0.12f); + } + }); + } + + @Test + public void testReadDouble() + throws IOException + { + testDataInput(new DataInputTester(SIZE_OF_DOUBLE) + { + @Override + public void loadValue(DataOutputStream output, int valueIndex) + throws IOException + { + output.writeDouble(valueIndex + 0.12); + } + + @Override + public void verifyValue(DataSeekableInputStream input, int valueIndex) + throws IOException + { + assertEquals(input.readDouble(), valueIndex + 0.12); + } + }); + } + + @Test + public void testSkip() + throws IOException + { + for (int readSize : VARIABLE_READ_SIZES) { + // skip without any reads + testDataInput(new SkipDataInputTester(readSize) + { + @Override + public void verifyValue(DataSeekableInputStream input, int valueIndex) + throws IOException + { + input.skip(valueSize()); + } + + @Override + public void verifyReadOffEnd(DataSeekableInputStream input) + throws IOException + { + assertEquals(input.skip(valueSize()), valueSize() - 1); + } + }); + testDataInput(new SkipDataInputTester(readSize) + { + @Override + public void verifyValue(DataSeekableInputStream input, int valueIndex) + throws IOException + { + input.skipBytes(valueSize()); + } + + @Override + public void verifyReadOffEnd(DataSeekableInputStream input) + throws IOException + { + assertEquals(input.skip(valueSize()), valueSize() - 1); + } + }); + + // read when no data available to force buffering + testDataInput(new SkipDataInputTester(readSize) + { + @Override + public void verifyValue(DataSeekableInputStream input, int valueIndex) + throws IOException + { + int length = valueSize(); + while (length > 0) { + if (input.available() == 0) { + input.readByte(); + length--; + } + int skipSize = input.skipBytes(length); + length -= skipSize; + } + assertEquals(input.skip(0), 0); + } + }); + testDataInput(new SkipDataInputTester(readSize) + { + @Override + public void verifyValue(DataSeekableInputStream input, int valueIndex) + throws IOException + { + long length = valueSize(); + while (length > 0) { + if (input.available() == 0) { + input.readByte(); + length--; + } + long skipSize = input.skip(length); + length -= skipSize; + } + assertEquals(input.skip(0), 0); + } + }); + } + } + + @Test + public void testReadSlice() + throws IOException + { + for (int readSize : VARIABLE_READ_SIZES) { + testDataInput(new StringDataInputTester(readSize) + { + @Override + public String readActual(DataSeekableInputStream input) + throws IOException + { + return input.readSlice(valueSize()).toStringUtf8(); + } + }); + } + } + + @Test + public void testReadFully() + throws IOException + { + for (int readSize : VARIABLE_READ_SIZES) { + testDataInput(new StringDataInputTester(readSize) + { + @Override + public String readActual(DataSeekableInputStream input) + throws IOException + { + Slice slice = Slices.allocate(valueSize()); + input.readFully(slice); + return slice.toStringUtf8(); + } + }); + testDataInput(new StringDataInputTester(readSize) + { + @Override + public String readActual(DataSeekableInputStream input) + throws IOException + { + Slice slice = Slices.allocate(valueSize() + 10); + input.readFully(slice, 5, valueSize()); + return slice.slice(5, valueSize()).toStringUtf8(); + } + }); + testDataInput(new StringDataInputTester(readSize) + { + @Override + public String readActual(DataSeekableInputStream input) + throws IOException + { + byte[] bytes = new byte[valueSize()]; + input.readFully(bytes, 0, valueSize()); + return new String(bytes, 0, valueSize(), UTF_8); + } + }); + testDataInput(new StringDataInputTester(readSize) + { + @Override + public String readActual(DataSeekableInputStream input) + throws IOException + { + byte[] bytes = new byte[valueSize() + 10]; + input.readFully(bytes, 5, valueSize()); + return new String(bytes, 5, valueSize(), UTF_8); + } + }); + testDataInput(new StringDataInputTester(readSize) + { + @Override + public String readActual(DataSeekableInputStream input) + throws IOException + { + byte[] bytes = new byte[valueSize()]; + int bytesRead = input.read(bytes); + if (bytesRead == -1) { + throw new EOFException(); + } + assertTrue(bytesRead > 0, "Expected to read at least one byte"); + input.readFully(bytes, bytesRead, bytes.length - bytesRead); + return new String(bytes, 0, valueSize(), UTF_8); + } + }); + testDataInput(new StringDataInputTester(readSize) + { + @Override + public String readActual(DataSeekableInputStream input) + throws IOException + { + byte[] bytes = new byte[valueSize() + 10]; + ByteStreams.readFully(input, bytes, 5, valueSize()); + return new String(bytes, 5, valueSize(), UTF_8); + } + }); + testDataInput(new StringDataInputTester(readSize) + { + @Override + public String readActual(DataSeekableInputStream input) + throws IOException + { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + input.readFully(out, valueSize()); + return out.toString(UTF_8); + } + }); + } + } + + @Test + public void testEmptyInput() + throws Exception + { + DataSeekableInputStream input = createDataSeekableInputStream(new byte[0]); + assertEquals(input.getPos(), 0); + } + + @Test + public void testEmptyRead() + throws Exception + { + DataSeekableInputStream input = createDataSeekableInputStream(new byte[0]); + assertEquals(input.read(), -1); + } + + @Test(expectedExceptions = EOFException.class) + public void testReadByteBeyondEnd() + throws Exception + { + DataSeekableInputStream input = createDataSeekableInputStream(new byte[0]); + input.readByte(); + } + + @Test(expectedExceptions = EOFException.class) + public void testReadShortBeyondEnd() + throws Exception + { + DataSeekableInputStream input = createDataSeekableInputStream(new byte[1]); + input.readShort(); + } + + @Test(expectedExceptions = EOFException.class) + public void testReadIntBeyondEnd() + throws Exception + { + DataSeekableInputStream input = createDataSeekableInputStream(new byte[3]); + input.readInt(); + } + + @Test(expectedExceptions = EOFException.class) + public void testReadLongBeyondEnd() + throws Exception + { + DataSeekableInputStream input = createDataSeekableInputStream(new byte[7]); + input.readLong(); + } + + @Test + public void testEncodingBoolean() + throws Exception + { + assertTrue(createDataSeekableInputStream(new byte[] {1}).readBoolean()); + assertFalse(createDataSeekableInputStream(new byte[] {0}).readBoolean()); + } + + @Test + public void testEncodingByte() + throws Exception + { + assertEquals(createDataSeekableInputStream(new byte[] {92}).readByte(), 92); + assertEquals(createDataSeekableInputStream(new byte[] {-100}).readByte(), -100); + assertEquals(createDataSeekableInputStream(new byte[] {-17}).readByte(), -17); + + assertEquals(createDataSeekableInputStream(new byte[] {92}).readUnsignedByte(), 92); + assertEquals(createDataSeekableInputStream(new byte[] {-100}).readUnsignedByte(), 156); + assertEquals(createDataSeekableInputStream(new byte[] {-17}).readUnsignedByte(), 239); + } + + @Test + public void testEncodingShort() + throws Exception + { + assertEquals(createDataSeekableInputStream(new byte[] {109, 92}).readShort(), 23661); + assertEquals(createDataSeekableInputStream(new byte[] {109, -100}).readShort(), -25491); + assertEquals(createDataSeekableInputStream(new byte[] {-52, -107}).readShort(), -27188); + + assertEquals(createDataSeekableInputStream(new byte[] {109, -100}).readUnsignedShort(), 40045); + assertEquals(createDataSeekableInputStream(new byte[] {-52, -107}).readUnsignedShort(), 38348); + } + + @Test + public void testEncodingInteger() + throws Exception + { + assertEquals(createDataSeekableInputStream(new byte[] {109, 92, 75, 58}).readInt(), 978017389); + assertEquals(createDataSeekableInputStream(new byte[] {-16, -60, -120, -1}).readInt(), -7813904); + } + + @Test + public void testEncodingLong() + throws Exception + { + assertEquals(createDataSeekableInputStream(new byte[] {49, -114, -96, -23, -32, -96, -32, 127}).readLong(), 9214541725452766769L); + assertEquals(createDataSeekableInputStream(new byte[] {109, 92, 75, 58, 18, 120, -112, -17}).readLong(), -1184314682315678611L); + } + + @Test + public void testEncodingDouble() + throws Exception + { + assertEquals(createDataSeekableInputStream(new byte[] {31, -123, -21, 81, -72, 30, 9, 64}).readDouble(), 3.14); + assertEquals(createDataSeekableInputStream(new byte[] {0, 0, 0, 0, 0, 0, -8, 127}).readDouble(), Double.NaN); + assertEquals(createDataSeekableInputStream(new byte[] {0, 0, 0, 0, 0, 0, -16, -1}).readDouble(), Double.NEGATIVE_INFINITY); + assertEquals(createDataSeekableInputStream(new byte[] {0, 0, 0, 0, 0, 0, -16, 127}).readDouble(), Double.POSITIVE_INFINITY); + } + + @Test + public void testEncodingFloat() + throws Exception + { + assertEquals(createDataSeekableInputStream(new byte[] {-61, -11, 72, 64}).readFloat(), 3.14f); + assertEquals(createDataSeekableInputStream(new byte[] {0, 0, -64, 127}).readFloat(), Float.NaN); + assertEquals(createDataSeekableInputStream(new byte[] {0, 0, -128, -1}).readFloat(), Float.NEGATIVE_INFINITY); + assertEquals(createDataSeekableInputStream(new byte[] {0, 0, -128, 127}).readFloat(), Float.POSITIVE_INFINITY); + } + + @Test + public void testRetainedSize() + { + int bufferSize = 1024; + SeekableInputStream inputStream = new MemorySeekableInputStream(Slices.wrappedBuffer(new byte[] {0, 1})); + DataSeekableInputStream input = new DataSeekableInputStream(inputStream, bufferSize); + assertEquals(input.getRetainedSize(), ClassLayout.parseClass(DataSeekableInputStream.class).instanceSize() + sizeOfByteArray(bufferSize)); + } + + private static void testDataInput(DataInputTester tester) + throws IOException + { + int size = (BUFFER_SIZE * 3) + 10; + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(size); + try (DataOutputStream output = new DataOutputStream(byteArrayOutputStream)) { + for (int i = 0; i < size / tester.valueSize(); i++) { + tester.loadValue(output, i); + } + } + byte[] bytes = byteArrayOutputStream.toByteArray(); + + testReadForward(tester, bytes); + testReadReverse(tester, bytes); + testReadOffEnd(tester, bytes); + } + + private static void testReadForward(DataInputTester tester, byte[] bytes) + throws IOException + { + DataSeekableInputStream input = createDataSeekableInputStream(bytes); + for (int i = 0; i < bytes.length / tester.valueSize(); i++) { + int position = i * tester.valueSize(); + assertEquals(input.getPos(), position); + tester.verifyValue(input, i); + } + } + + private static void testReadReverse(DataInputTester tester, byte[] bytes) + throws IOException + { + DataSeekableInputStream input = createDataSeekableInputStream(bytes); + for (int i = bytes.length / tester.valueSize() - 1; i >= 0; i--) { + int position = i * tester.valueSize(); + input.seek(position); + assertEquals(input.getPos(), position); + tester.verifyValue(input, i); + } + } + + private static void testReadOffEnd(DataInputTester tester, byte[] bytes) + throws IOException + { + DataSeekableInputStream input = createDataSeekableInputStream(bytes); + ByteStreams.skipFully(input, bytes.length - tester.valueSize() + 1); + tester.verifyReadOffEnd(input); + } + + private static String getExpectedStringValue(int index, int size) + throws IOException + { + return ByteSource.concat(cycle(ByteSource.wrap(String.valueOf(index).getBytes(UTF_8)))).slice(0, size).asCharSource(UTF_8).read(); + } + + protected abstract static class DataInputTester + { + private final int size; + + public DataInputTester(int size) + { + this.size = size; + } + + public final int valueSize() + { + return size; + } + + public abstract void loadValue(DataOutputStream slice, int valueIndex) + throws IOException; + + public abstract void verifyValue(DataSeekableInputStream input, int valueIndex) + throws IOException; + + public void verifyReadOffEnd(DataSeekableInputStream input) + throws IOException + { + try { + verifyValue(input, 1); + fail("expected EOFException"); + } + catch (EOFException expected) { + } + } + } + + private abstract static class SkipDataInputTester + extends DataInputTester + { + public SkipDataInputTester(int size) + { + super(size); + } + + @Override + public void loadValue(DataOutputStream output, int valueIndex) + throws IOException + { + output.write(new byte[valueSize()]); + } + } + + private abstract static class StringDataInputTester + extends DataInputTester + { + public StringDataInputTester(int size) + { + super(size); + } + + @Override + public final void loadValue(DataOutputStream output, int valueIndex) + throws IOException + { + output.write(getExpectedStringValue(valueIndex, valueSize()).getBytes(UTF_8)); + } + + @Override + public final void verifyValue(DataSeekableInputStream input, int valueIndex) + throws IOException + { + String actual = readActual(input); + String expected = getExpectedStringValue(valueIndex, valueSize()); + assertEquals(actual, expected); + } + + protected abstract String readActual(DataSeekableInputStream input) + throws IOException; + } + + private static DataSeekableInputStream createDataSeekableInputStream(byte[] bytes) + { + SeekableInputStream inputStream = new MemorySeekableInputStream(Slices.wrappedBuffer(bytes)); + return new DataSeekableInputStream(inputStream, 16 * 1024); + } +} diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java index 12481ba5dd48..ca062f00f6fd 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java @@ -19,10 +19,11 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; -import io.airlift.slice.OutputStreamSliceOutput; import io.airlift.slice.Slice; import io.airlift.slice.Slices; import io.airlift.units.DataSize; +import io.trino.filesystem.TrinoInputFile; +import io.trino.filesystem.local.LocalInputFile; import io.trino.hadoop.HadoopNative; import io.trino.hive.formats.compression.CompressionKind; import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding; @@ -116,7 +117,6 @@ import static io.airlift.slice.SizeOf.SIZE_OF_INT; import static io.airlift.slice.SizeOf.SIZE_OF_LONG; import static io.airlift.units.DataSize.Unit.KILOBYTE; -import static io.airlift.units.DataSize.Unit.MEGABYTE; import static io.trino.hadoop.ConfigurationInstantiator.newEmptyConfiguration; import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.findFirstSyncPosition; import static io.trino.hive.formats.rcfile.RcFileTester.Compression.BZIP2; @@ -567,21 +567,20 @@ private static List getSyncPositionsSimple(RcFileReader recordReader, File long syncFirst = sync.getLong(0); long syncSecond = sync.getLong(8); long syncPosition = 0; - try (RcFileDataSource dataSource = new FileRcFileDataSource(file)) { - while (syncPosition >= 0) { - syncPosition = findFirstSyncPosition(dataSource, syncPosition, file.length() - syncPosition, syncFirst, syncSecond); - if (syncPosition > 0) { - assertEquals(findFirstSyncPosition(dataSource, syncPosition, 1, syncFirst, syncSecond), syncPosition); - assertEquals(findFirstSyncPosition(dataSource, syncPosition, 2, syncFirst, syncSecond), syncPosition); - assertEquals(findFirstSyncPosition(dataSource, syncPosition, 10, syncFirst, syncSecond), syncPosition); - - assertEquals(findFirstSyncPosition(dataSource, syncPosition - 1, 1, syncFirst, syncSecond), -1); - assertEquals(findFirstSyncPosition(dataSource, syncPosition - 2, 2, syncFirst, syncSecond), -1); - assertEquals(findFirstSyncPosition(dataSource, syncPosition + 1, 1, syncFirst, syncSecond), -1); - - syncPositions.add(syncPosition); - syncPosition++; - } + TrinoInputFile inputFile = new LocalInputFile(file); + while (syncPosition >= 0) { + syncPosition = findFirstSyncPosition(inputFile, syncPosition, file.length() - syncPosition, syncFirst, syncSecond); + if (syncPosition > 0) { + assertEquals(findFirstSyncPosition(inputFile, syncPosition, 1, syncFirst, syncSecond), syncPosition); + assertEquals(findFirstSyncPosition(inputFile, syncPosition, 2, syncFirst, syncSecond), syncPosition); + assertEquals(findFirstSyncPosition(inputFile, syncPosition, 10, syncFirst, syncSecond), syncPosition); + + assertEquals(findFirstSyncPosition(inputFile, syncPosition - 1, 1, syncFirst, syncSecond), -1); + assertEquals(findFirstSyncPosition(inputFile, syncPosition - 2, 2, syncFirst, syncSecond), -1); + assertEquals(findFirstSyncPosition(inputFile, syncPosition + 1, 1, syncFirst, syncSecond), -1); + + syncPositions.add(syncPosition); + syncPosition++; } } return syncPositions; @@ -590,14 +589,13 @@ private static List getSyncPositionsSimple(RcFileReader recordReader, File private static RcFileReader createRcFileReader(TempFile tempFile, Type type, RcFileEncoding encoding) throws IOException { - RcFileDataSource rcFileDataSource = new FileRcFileDataSource(tempFile.getFile()); + TrinoInputFile rcFileDataSource = new LocalInputFile(tempFile.getFile()); RcFileReader rcFileReader = new RcFileReader( rcFileDataSource, encoding, ImmutableMap.of(0, type), 0, - tempFile.getFile().length(), - DataSize.of(8, MEGABYTE)); + tempFile.getFile().length()); assertEquals(rcFileReader.getColumnCount(), 1); @@ -607,9 +605,8 @@ private static RcFileReader createRcFileReader(TempFile tempFile, Type type, RcF private static DataSize writeRcFileColumnNew(File outputFile, Format format, Compression compression, Type type, Iterator values, Map metadata) throws Exception { - OutputStreamSliceOutput output = new OutputStreamSliceOutput(new FileOutputStream(outputFile)); RcFileWriter writer = new RcFileWriter( - output, + new FileOutputStream(outputFile), ImmutableList.of(type), format.getVectorEncoding(), compression.getCompressionKind(), @@ -626,9 +623,9 @@ private static DataSize writeRcFileColumnNew(File outputFile, Format format, Com writer.write(new Page(blockBuilder.build())); writer.close(); - writer.validate(new FileRcFileDataSource(outputFile)); + writer.validate(new LocalInputFile(outputFile)); - return DataSize.ofBytes(output.size()); + return DataSize.ofBytes(outputFile.length()); } private static void writeValue(Type type, BlockBuilder blockBuilder, Object value) diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java index 2cc1bc53cbe7..66ee6bfedd77 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileReaderManual.java @@ -18,7 +18,7 @@ import io.airlift.slice.DynamicSliceOutput; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.airlift.units.DataSize; +import io.trino.filesystem.memory.MemoryInputFile; import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding; import io.trino.spi.block.Block; import org.joda.time.DateTimeZone; @@ -29,7 +29,6 @@ import static com.google.common.base.Preconditions.checkArgument; import static io.airlift.slice.Slices.utf8Slice; -import static io.airlift.units.DataSize.Unit.MEGABYTE; import static io.trino.spi.type.SmallintType.SMALLINT; import static java.util.stream.Collectors.toList; import static org.testng.Assert.assertEquals; @@ -236,12 +235,11 @@ private static List readValues(Slice data, int offset, int length) } RcFileReader reader = new RcFileReader( - new MemoryRcFileDataSource(new RcFileDataSourceId("test"), data), + new MemoryInputFile("test", data), new BinaryRcFileEncoding(DateTimeZone.UTC), ImmutableMap.of(0, SMALLINT), offset, - length, - DataSize.of(8, MEGABYTE)); + length); ImmutableList.Builder values = ImmutableList.builder(); while (reader.advance() >= 0) { diff --git a/plugin/trino-hive/pom.xml b/plugin/trino-hive/pom.xml index c51de92a12f8..2511b125199e 100644 --- a/plugin/trino-hive/pom.xml +++ b/plugin/trino-hive/pom.xml @@ -260,6 +260,11 @@ alluxio-shaded-client + + org.apache.iceberg + iceberg-api + + org.apache.thrift libthrift diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/MonitoredTrinoInputFile.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/MonitoredTrinoInputFile.java new file mode 100644 index 000000000000..2e642cba3db7 --- /dev/null +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/MonitoredTrinoInputFile.java @@ -0,0 +1,184 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.hive; + +import io.trino.filesystem.TrinoInput; +import io.trino.filesystem.TrinoInputFile; +import org.apache.iceberg.io.SeekableInputStream; + +import java.io.IOException; + +import static java.util.Objects.requireNonNull; + +public class MonitoredTrinoInputFile + implements TrinoInputFile +{ + private final FileFormatDataSourceStats stats; + private final TrinoInputFile delegate; + + public MonitoredTrinoInputFile(FileFormatDataSourceStats stats, TrinoInputFile delegate) + { + this.stats = requireNonNull(stats, "stats is null"); + this.delegate = requireNonNull(delegate, "delegate is null"); + } + + @Override + public TrinoInput newInput() + throws IOException + { + return new MonitoredTrinoInput(stats, delegate.newInput()); + } + + @Override + public long length() + throws IOException + { + return delegate.length(); + } + + @Override + public long modificationTime() + throws IOException + { + return delegate.modificationTime(); + } + + @Override + public boolean exists() + throws IOException + { + return delegate.exists(); + } + + @Override + public String location() + { + return delegate.location(); + } + + @Override + public String toString() + { + return delegate.toString(); + } + + private static final class MonitoredTrinoInput + implements TrinoInput + { + private final FileFormatDataSourceStats stats; + private final TrinoInput delegate; + + public MonitoredTrinoInput(FileFormatDataSourceStats stats, TrinoInput delegate) + { + this.stats = requireNonNull(stats, "stats is null"); + this.delegate = requireNonNull(delegate, "delegate is null"); + } + + @Override + public SeekableInputStream inputStream() + { + return new MonitoredSeekableInputStream(stats, delegate.inputStream()); + } + + @Override + public void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength) + throws IOException + { + long readStart = System.nanoTime(); + delegate.readFully(position, buffer, bufferOffset, bufferLength); + stats.readDataBytesPerSecond(bufferLength, System.nanoTime() - readStart); + } + + @Override + public int readTail(byte[] buffer, int bufferOffset, int bufferLength) + throws IOException + { + long readStart = System.nanoTime(); + int size = delegate.readTail(buffer, bufferOffset, bufferLength); + stats.readDataBytesPerSecond(size, System.nanoTime() - readStart); + return size; + } + + @Override + public void close() + throws IOException + { + delegate.close(); + } + } + + private static final class MonitoredSeekableInputStream + extends SeekableInputStream + { + private final FileFormatDataSourceStats stats; + private final SeekableInputStream delegate; + + public MonitoredSeekableInputStream(FileFormatDataSourceStats stats, SeekableInputStream delegate) + { + this.stats = requireNonNull(stats, "stats is null"); + this.delegate = requireNonNull(delegate, "delegate is null"); + } + + @Override + public long getPos() + throws IOException + { + return delegate.getPos(); + } + + @Override + public void seek(long newPos) + throws IOException + { + delegate.seek(newPos); + } + + @Override + public int read() + throws IOException + { + long readStart = System.nanoTime(); + int value = delegate.read(); + stats.readDataBytesPerSecond(1, System.nanoTime() - readStart); + return value; + } + + @Override + public int read(byte[] b, int off, int len) + throws IOException + { + long readStart = System.nanoTime(); + int size = delegate.read(b, off, len); + stats.readDataBytesPerSecond(size, System.nanoTime() - readStart); + return size; + } + + @Override + public long skip(long n) + throws IOException + { + long readStart = System.nanoTime(); + long size = delegate.skip(n); + stats.readDataBytesPerSecond(size, System.nanoTime() - readStart); + return size; + } + + @Override + public void close() + throws IOException + { + delegate.close(); + } + } +} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java index 4571beae0740..7f27b70b8204 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriter.java @@ -15,11 +15,11 @@ import com.google.common.collect.ImmutableList; import com.google.common.io.CountingOutputStream; -import io.airlift.slice.OutputStreamSliceOutput; +import io.trino.filesystem.TrinoInputFile; import io.trino.hive.formats.compression.CompressionKind; -import io.trino.hive.formats.rcfile.RcFileDataSource; import io.trino.hive.formats.rcfile.RcFileEncoding; import io.trino.hive.formats.rcfile.RcFileWriter; +import io.trino.memory.context.AggregatedMemoryContext; import io.trino.spi.Page; import io.trino.spi.TrinoException; import io.trino.spi.block.Block; @@ -53,28 +53,31 @@ public class RcFileFileWriter private static final ThreadMXBean THREAD_MX_BEAN = ManagementFactory.getThreadMXBean(); private final CountingOutputStream outputStream; + private final AggregatedMemoryContext outputStreamMemoryContext; private final RcFileWriter rcFileWriter; private final Closeable rollbackAction; private final int[] fileInputColumnIndexes; private final List nullBlocks; - private final Optional> validationInputFactory; + private final Optional> validationInputFactory; private long validationCpuNanos; public RcFileFileWriter( OutputStream outputStream, + AggregatedMemoryContext outputStreamMemoryContext, Closeable rollbackAction, RcFileEncoding rcFileEncoding, List fileColumnTypes, Optional compressionKind, int[] fileInputColumnIndexes, Map metadata, - Optional> validationInputFactory) + Optional> validationInputFactory) throws IOException { this.outputStream = new CountingOutputStream(outputStream); + this.outputStreamMemoryContext = outputStreamMemoryContext; rcFileWriter = new RcFileWriter( - new OutputStreamSliceOutput(this.outputStream), + this.outputStream, fileColumnTypes, rcFileEncoding, compressionKind, @@ -103,7 +106,7 @@ public long getWrittenBytes() @Override public long getMemoryUsage() { - return INSTANCE_SIZE + rcFileWriter.getRetainedSizeInBytes(); + return INSTANCE_SIZE + rcFileWriter.getRetainedSizeInBytes() + outputStreamMemoryContext.getBytes(); } @Override @@ -146,11 +149,10 @@ public Closeable commit() if (validationInputFactory.isPresent()) { try { - try (RcFileDataSource input = validationInputFactory.get().get()) { - long startThreadCpuTime = THREAD_MX_BEAN.getCurrentThreadCpuTime(); - rcFileWriter.validate(input); - validationCpuNanos += THREAD_MX_BEAN.getCurrentThreadCpuTime() - startThreadCpuTime; - } + TrinoInputFile inputFile = validationInputFactory.get().get(); + long startThreadCpuTime = THREAD_MX_BEAN.getCurrentThreadCpuTime(); + rcFileWriter.validate(inputFile); + validationCpuNanos += THREAD_MX_BEAN.getCurrentThreadCpuTime() - startThreadCpuTime; } catch (IOException | UncheckedIOException e) { throw new TrinoException(HIVE_WRITE_VALIDATION_FAILED, e); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java index aae254beb724..9dcc2c7a890a 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java @@ -14,19 +14,20 @@ package io.trino.plugin.hive; import com.google.common.collect.ImmutableMap; +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.TrinoInputFile; +import io.trino.filesystem.hdfs.HdfsFileSystemFactory; import io.trino.hdfs.HdfsEnvironment; import io.trino.hive.formats.compression.CompressionKind; -import io.trino.hive.formats.rcfile.RcFileDataSource; import io.trino.hive.formats.rcfile.RcFileEncoding; import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding; +import io.trino.memory.context.AggregatedMemoryContext; import io.trino.plugin.hive.acid.AcidTransaction; import io.trino.plugin.hive.metastore.StorageFormat; -import io.trino.plugin.hive.rcfile.HdfsRcFileDataSource; import io.trino.spi.TrinoException; import io.trino.spi.connector.ConnectorSession; import io.trino.spi.type.Type; import io.trino.spi.type.TypeManager; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; @@ -35,7 +36,6 @@ import javax.inject.Inject; import java.io.Closeable; -import java.io.IOException; import java.io.OutputStream; import java.util.List; import java.util.Optional; @@ -43,8 +43,8 @@ import java.util.Properties; import java.util.function.Supplier; +import static io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext; import static io.trino.plugin.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR; -import static io.trino.plugin.hive.HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED; import static io.trino.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME; import static io.trino.plugin.hive.HiveMetadata.PRESTO_VERSION_NAME; import static io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision; @@ -65,31 +65,27 @@ public class RcFileFileWriterFactory private final HdfsEnvironment hdfsEnvironment; private final TypeManager typeManager; private final NodeVersion nodeVersion; - private final FileFormatDataSourceStats stats; @Inject public RcFileFileWriterFactory( HdfsEnvironment hdfsEnvironment, TypeManager typeManager, NodeVersion nodeVersion, - HiveConfig hiveConfig, - FileFormatDataSourceStats stats) + HiveConfig hiveConfig) { - this(hdfsEnvironment, typeManager, nodeVersion, hiveConfig.getRcfileDateTimeZone(), stats); + this(hdfsEnvironment, typeManager, nodeVersion, hiveConfig.getRcfileDateTimeZone()); } public RcFileFileWriterFactory( HdfsEnvironment hdfsEnvironment, TypeManager typeManager, NodeVersion nodeVersion, - DateTimeZone timeZone, - FileFormatDataSourceStats stats) + DateTimeZone timeZone) { this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); this.typeManager = requireNonNull(typeManager, "typeManager is null"); this.nodeVersion = requireNonNull(nodeVersion, "nodeVersion is null"); this.timeZone = requireNonNull(timeZone, "timeZone is null"); - this.stats = requireNonNull(stats, "stats is null"); } @Override @@ -135,29 +131,20 @@ else if (COLUMNAR_SERDE_CLASS.equals(storageFormat.getSerde())) { .toArray(); try { - FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, configuration); - OutputStream outputStream = fileSystem.create(path, false); + TrinoFileSystem fileSystem = new HdfsFileSystemFactory(hdfsEnvironment).create(session.getIdentity()); + AggregatedMemoryContext outputStreamMemoryContext = newSimpleAggregatedMemoryContext(); + OutputStream outputStream = fileSystem.newOutputFile(path.toString()).create(outputStreamMemoryContext); - Optional> validationInputFactory = Optional.empty(); + Optional> validationInputFactory = Optional.empty(); if (isRcfileOptimizedWriterValidate(session)) { - validationInputFactory = Optional.of(() -> { - try { - return new HdfsRcFileDataSource( - path.toString(), - fileSystem.open(path), - fileSystem.getFileStatus(path).getLen(), - stats); - } - catch (IOException e) { - throw new TrinoException(HIVE_WRITE_VALIDATION_FAILED, e); - } - }); + validationInputFactory = Optional.of(() -> fileSystem.newInputFile(path.toString())); } - Closeable rollbackAction = () -> fileSystem.delete(path, false); + Closeable rollbackAction = () -> fileSystem.deleteFile(path.toString()); return Optional.of(new RcFileFileWriter( outputStream, + outputStreamMemoryContext, rollbackAction, rcFileEncoding, fileColumnTypes, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/HdfsRcFileDataSource.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/HdfsRcFileDataSource.java deleted file mode 100644 index 822cef0cddcf..000000000000 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/HdfsRcFileDataSource.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.rcfile; - -import io.trino.hive.formats.rcfile.RcFileDataSource; -import io.trino.hive.formats.rcfile.RcFileDataSourceId; -import io.trino.plugin.hive.FileFormatDataSourceStats; -import org.apache.hadoop.fs.FSDataInputStream; - -import java.io.IOException; - -import static com.google.common.base.Preconditions.checkArgument; -import static java.util.Objects.requireNonNull; - -public class HdfsRcFileDataSource - implements RcFileDataSource -{ - private final FSDataInputStream inputStream; - private final String path; - private final long size; - private final FileFormatDataSourceStats stats; - private long readTimeNanos; - private long readBytes; - - public HdfsRcFileDataSource(String path, FSDataInputStream inputStream, long size, FileFormatDataSourceStats stats) - { - this.path = requireNonNull(path, "path is null"); - this.inputStream = requireNonNull(inputStream, "inputStream is null"); - this.size = size; - checkArgument(size >= 0, "size is negative"); - this.stats = requireNonNull(stats, "stats is null"); - } - - @Override - public RcFileDataSourceId getId() - { - return new RcFileDataSourceId(path); - } - - @Override - public void close() - throws IOException - { - inputStream.close(); - } - - @Override - public long getReadBytes() - { - return readBytes; - } - - @Override - public long getReadTimeNanos() - { - return readTimeNanos; - } - - @Override - public long getSize() - { - return size; - } - - @Override - public void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength) - throws IOException - { - long start = System.nanoTime(); - - inputStream.readFully(position, buffer, bufferOffset, bufferLength); - - long readDuration = System.nanoTime() - start; - stats.readDataBytesPerSecond(bufferLength, readDuration); - - readTimeNanos += readDuration; - readBytes += bufferLength; - } - - @Override - public String toString() - { - return path; - } -} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSource.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSource.java index 48bc5309f174..192a71fce9d6 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSource.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSource.java @@ -144,11 +144,11 @@ public Page getNextPage() } catch (RcFileCorruptionException e) { closeAllSuppress(e, this); - throw new TrinoException(HIVE_BAD_DATA, format("Corrupted RC file: %s", rcFileReader.getId()), e); + throw new TrinoException(HIVE_BAD_DATA, format("Corrupted RC file: %s", rcFileReader.getFileLocation()), e); } catch (IOException | RuntimeException e) { closeAllSuppress(e, this); - throw new TrinoException(HIVE_CURSOR_ERROR, format("Failed to read RC file: %s", rcFileReader.getId()), e); + throw new TrinoException(HIVE_CURSOR_ERROR, format("Failed to read RC file: %s", rcFileReader.getFileLocation()), e); } } @@ -212,10 +212,10 @@ public Block load() block = rcFileReader.readBlock(columnIndex); } catch (RcFileCorruptionException e) { - throw new TrinoException(HIVE_BAD_DATA, format("Corrupted RC file: %s", rcFileReader.getId()), e); + throw new TrinoException(HIVE_BAD_DATA, format("Corrupted RC file: %s", rcFileReader.getFileLocation()), e); } catch (IOException | RuntimeException e) { - throw new TrinoException(HIVE_CURSOR_ERROR, format("Failed to read RC file: %s", rcFileReader.getId()), e); + throw new TrinoException(HIVE_CURSOR_ERROR, format("Failed to read RC file: %s", rcFileReader.getFileLocation()), e); } loaded = true; diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java index 37ed5be77d27..162a8769b0ab 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java @@ -18,12 +18,13 @@ import io.airlift.slice.Slices; import io.airlift.units.DataSize; import io.airlift.units.DataSize.Unit; -import io.trino.hdfs.FSDataInputStreamTail; +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.TrinoInput; +import io.trino.filesystem.TrinoInputFile; +import io.trino.filesystem.hdfs.HdfsFileSystemFactory; +import io.trino.filesystem.memory.MemoryInputFile; import io.trino.hdfs.HdfsEnvironment; -import io.trino.hive.formats.rcfile.MemoryRcFileDataSource; import io.trino.hive.formats.rcfile.RcFileCorruptionException; -import io.trino.hive.formats.rcfile.RcFileDataSource; -import io.trino.hive.formats.rcfile.RcFileDataSourceId; import io.trino.hive.formats.rcfile.RcFileEncoding; import io.trino.hive.formats.rcfile.RcFileReader; import io.trino.hive.formats.rcfile.binary.BinaryRcFileEncoding; @@ -34,6 +35,7 @@ import io.trino.plugin.hive.HiveConfig; import io.trino.plugin.hive.HivePageSourceFactory; import io.trino.plugin.hive.HiveTimestampPrecision; +import io.trino.plugin.hive.MonitoredTrinoInputFile; import io.trino.plugin.hive.ReaderColumns; import io.trino.plugin.hive.ReaderPageSource; import io.trino.plugin.hive.acid.AcidTransaction; @@ -45,8 +47,6 @@ import io.trino.spi.type.Type; import io.trino.spi.type.TypeManager; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.BlockMissingException; import org.joda.time.DateTimeZone; @@ -54,7 +54,7 @@ import javax.inject.Inject; import java.io.FileNotFoundException; -import java.io.IOException; +import java.io.InputStream; import java.util.List; import java.util.Optional; import java.util.OptionalInt; @@ -83,7 +83,6 @@ import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_NULL_FORMAT; import static java.lang.Math.min; -import static java.lang.Math.toIntExact; import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters.SERIALIZATION_EXTEND_NESTING_LEVELS; @@ -159,24 +158,22 @@ else if (deserializerClassName.equals(COLUMNAR_SERDE_CLASS)) { .collect(toImmutableList()); } - RcFileDataSource dataSource; + TrinoFileSystem trinoFileSystem = new HdfsFileSystemFactory(hdfsEnvironment).create(session.getIdentity()); + TrinoInputFile inputFile = new MonitoredTrinoInputFile(stats, trinoFileSystem.newInputFile(path.toString())); try { - FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, configuration); - FSDataInputStream inputStream = hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.open(path)); + length = min(inputFile.length() - start, length); + if (!inputFile.exists()) { + throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, "File does not exist"); + } if (estimatedFileSize < BUFFER_SIZE.toBytes()) { - // Handle potentially imprecise file lengths by reading the footer - try { - FSDataInputStreamTail fileTail = FSDataInputStreamTail.readTail(path.toString(), estimatedFileSize, inputStream, toIntExact(BUFFER_SIZE.toBytes())); - dataSource = new MemoryRcFileDataSource(new RcFileDataSourceId(path.toString()), fileTail.getTailSlice()); - } - finally { - inputStream.close(); + try (TrinoInput input = inputFile.newInput(); InputStream inputStream = input.inputStream()) { + byte[] data = inputStream.readAllBytes(); + inputFile = new MemoryInputFile(path.toString(), Slices.wrappedBuffer(data)); } } - else { - long fileSize = hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.getFileStatus(path).getLen()); - dataSource = new HdfsRcFileDataSource(path.toString(), inputStream, fileSize, stats); - } + } + catch (TrinoException e) { + throw e; } catch (Exception e) { if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || @@ -186,7 +183,6 @@ else if (deserializerClassName.equals(COLUMNAR_SERDE_CLASS)) { throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e); } - length = min(dataSource.getSize() - start, length); // Split may be empty now that the correct file size is known if (length <= 0) { return Optional.of(noProjectionAdaptation(new EmptyPageSource())); @@ -200,22 +196,16 @@ else if (deserializerClassName.equals(COLUMNAR_SERDE_CLASS)) { } RcFileReader rcFileReader = new RcFileReader( - dataSource, + inputFile, rcFileEncoding, readColumns.buildOrThrow(), start, - length, - BUFFER_SIZE); + length); ConnectorPageSource pageSource = new RcFilePageSource(rcFileReader, projectedReaderColumns); return Optional.of(new ReaderPageSource(pageSource, readerProjections)); } catch (Throwable e) { - try { - dataSource.close(); - } - catch (IOException ignored) { - } if (e instanceof TrinoException) { throw (TrinoException) e; } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java index 7ac8b58e1eae..4b84368c4f6e 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java @@ -203,7 +203,7 @@ public static Set getDefaultHiveRecordCursorProviders( public static Set getDefaultHiveFileWriterFactories(HiveConfig hiveConfig, HdfsEnvironment hdfsEnvironment) { return ImmutableSet.builder() - .add(new RcFileFileWriterFactory(hdfsEnvironment, TESTING_TYPE_MANAGER, new NodeVersion("test_version"), hiveConfig, new FileFormatDataSourceStats())) + .add(new RcFileFileWriterFactory(hdfsEnvironment, TESTING_TYPE_MANAGER, new NodeVersion("test_version"), hiveConfig)) .add(getDefaultOrcFileWriterFactory(hdfsEnvironment)) .build(); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java index 0c56502b29f4..95035c2660df 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java @@ -249,7 +249,7 @@ public void testRcTextOptimizedWriter(int rowCount) assertThatFileFormat(RCTEXT) .withColumns(testColumns) .withRowsCount(rowCount) - .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) + .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE)) .isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig())); } @@ -290,7 +290,7 @@ public void testRcBinaryOptimizedWriter(int rowCount) assertThatFileFormat(RCBINARY) .withColumns(testColumns) .withRowsCount(rowCount) - .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) + .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE)) .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig())) .withColumns(testColumnsNoTimestamps) .isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); @@ -774,7 +774,7 @@ public void testRCBinaryProjectedColumns(int rowCount) .withWriteColumns(writeColumns) .withReadColumns(readColumns) .withRowsCount(rowCount) - .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) + .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE)) .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig())); } @@ -802,7 +802,7 @@ public void testRCBinaryProjectedColumnsPageSource(int rowCount) .withWriteColumns(writeColumns) .withReadColumns(readColumns) .withRowsCount(rowCount) - .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) + .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE)) .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig())); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java index f7bb730dff6b..25dab4c05fb2 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/benchmark/StandardFileFormats.java @@ -14,7 +14,6 @@ package io.trino.plugin.hive.benchmark; import com.google.common.collect.ImmutableMap; -import io.airlift.slice.OutputStreamSliceOutput; import io.trino.filesystem.hdfs.HdfsFileSystemFactory; import io.trino.hdfs.HdfsEnvironment; import io.trino.hive.formats.rcfile.RcFileEncoding; @@ -384,7 +383,7 @@ public PrestoRcFileFormatWriter(File targetFile, List types, RcFileEncodin throws IOException { writer = new RcFileWriter( - new OutputStreamSliceOutput(new FileOutputStream(targetFile)), + new FileOutputStream(targetFile), types, encoding, compressionCodec.getHiveCompressionKind(), From 7a6763386e2539baa5467c1db43bf49dbe1f1438 Mon Sep 17 00:00:00 2001 From: Dain Sundstrom Date: Sun, 28 Aug 2022 21:00:06 -0700 Subject: [PATCH 10/10] Rename and move RcFileDecoderUtils for reuse --- ...eDecoderUtils.java => ReadWriteUtils.java} | 10 +++------- .../hive/formats/rcfile/RcFileReader.java | 15 +++++++------- .../hive/formats/rcfile/RcFileWriter.java | 4 ++-- .../formats/rcfile/binary/BinaryEncoding.java | 6 +++--- .../formats/rcfile/binary/DateEncoding.java | 6 +++--- .../rcfile/binary/DecimalEncoding.java | 6 +++--- .../formats/rcfile/binary/ListEncoding.java | 8 ++++---- .../formats/rcfile/binary/LongEncoding.java | 12 +++++------ .../formats/rcfile/binary/MapEncoding.java | 8 ++++---- .../formats/rcfile/binary/StringEncoding.java | 8 ++++---- .../rcfile/binary/TimestampEncoding.java | 20 +++++++++---------- .../formats/rcfile/text/StringEncoding.java | 2 +- ...oderUtils.java => TestReadWriteUtils.java} | 12 +++++------ .../hive/formats/rcfile/RcFileTester.java | 2 +- 14 files changed, 58 insertions(+), 61 deletions(-) rename lib/trino-hive-formats/src/main/java/io/trino/hive/formats/{rcfile/RcFileDecoderUtils.java => ReadWriteUtils.java} (97%) rename lib/trino-hive-formats/src/test/java/io/trino/hive/formats/{rcfile/TestRcFileDecoderUtils.java => TestReadWriteUtils.java} (89%) diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDecoderUtils.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ReadWriteUtils.java similarity index 97% rename from lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDecoderUtils.java rename to lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ReadWriteUtils.java index e044e658ce3b..7d9a73619a49 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileDecoderUtils.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/ReadWriteUtils.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.hive.formats; import io.airlift.slice.Slice; import io.airlift.slice.SliceInput; @@ -19,8 +19,6 @@ import io.airlift.slice.Slices; import io.trino.filesystem.TrinoInput; import io.trino.filesystem.TrinoInputFile; -import io.trino.hive.formats.DataOutputStream; -import io.trino.hive.formats.DataSeekableInputStream; import io.trino.spi.type.CharType; import io.trino.spi.type.Type; import io.trino.spi.type.VarcharType; @@ -36,14 +34,12 @@ import static java.util.Objects.requireNonNull; // faster versions of org.apache.hadoop.io.WritableUtils methods adapted for Slice -public final class RcFileDecoderUtils +public final class ReadWriteUtils { // 0xFFFF_FFFF + syncFirst(long) + syncSecond(long) private static final int SYNC_SEQUENCE_LENGTH = SIZE_OF_INT + SIZE_OF_LONG + SIZE_OF_LONG; - private RcFileDecoderUtils() - { - } + private ReadWriteUtils() {} public static int decodeVIntSize(Slice slice, int offset) { diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java index 627bccb1c7c8..bd4d2c9ec7fe 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileReader.java @@ -19,6 +19,7 @@ import io.airlift.slice.Slices; import io.trino.filesystem.TrinoInputFile; import io.trino.hive.formats.DataSeekableInputStream; +import io.trino.hive.formats.ReadWriteUtils; import io.trino.hive.formats.compression.CompressionKind; import io.trino.hive.formats.compression.ValueDecompressor; import io.trino.hive.formats.rcfile.RcFileWriteValidation.WriteChecksum; @@ -221,7 +222,7 @@ else if (SEQUENCE_FILE_MAGIC.equals(magic)) { // of the file. In that case, the reader owns all row groups up to the first sync point. if (offset != 0) { // if the specified file region does not contain the start of a sync sequence, this call will close the reader - long startOfSyncSequence = RcFileDecoderUtils.findFirstSyncPosition(inputFile, offset, length, syncFirst, syncSecond); + long startOfSyncSequence = ReadWriteUtils.findFirstSyncPosition(inputFile, offset, length, syncFirst, syncSecond); if (startOfSyncSequence < 0) { closeQuietly(); return; @@ -384,7 +385,7 @@ else if (rowsRead > 0) { BasicSliceInput headerInput = header.getInput(); // read number of rows in row group - rowGroupRowCount = toIntExact(RcFileDecoderUtils.readVInt(headerInput)); + rowGroupRowCount = toIntExact(ReadWriteUtils.readVInt(headerInput)); rowsRead += rowGroupRowCount; rowGroupPosition = 0; currentChunkRowCount = min(ColumnData.MAX_SIZE, rowGroupRowCount); @@ -392,14 +393,14 @@ else if (rowsRead > 0) { // set column buffers int totalCompressedDataSize = 0; for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) { - int compressedDataSize = toIntExact(RcFileDecoderUtils.readVInt(headerInput)); + int compressedDataSize = toIntExact(ReadWriteUtils.readVInt(headerInput)); totalCompressedDataSize += compressedDataSize; - int uncompressedDataSize = toIntExact(RcFileDecoderUtils.readVInt(headerInput)); + int uncompressedDataSize = toIntExact(ReadWriteUtils.readVInt(headerInput)); if (decompressor == null && compressedDataSize != uncompressedDataSize) { throw corrupt("Invalid RCFile %s", location); } - int lengthsSize = toIntExact(RcFileDecoderUtils.readVInt(headerInput)); + int lengthsSize = toIntExact(ReadWriteUtils.readVInt(headerInput)); Slice lengthsBuffer = headerInput.readSlice(lengthsSize); @@ -452,7 +453,7 @@ private void closeQuietly() private Slice readLengthPrefixedString(DataSeekableInputStream in) throws IOException { - int length = toIntExact(RcFileDecoderUtils.readVInt(in)); + int length = toIntExact(ReadWriteUtils.readVInt(in)); verify(length <= MAX_METADATA_STRING_LENGTH, "Metadata string value is too long (%s) in RCFile %s", length, in); return in.readSlice(length); } @@ -615,7 +616,7 @@ private int readNextValueLength() return lastValueLength; } - int valueLength = toIntExact(RcFileDecoderUtils.readVInt(lengthsInput)); + int valueLength = toIntExact(ReadWriteUtils.readVInt(lengthsInput)); // negative length is used to encode a run or the last value if (valueLength < 0) { diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java index 3e93e68784cc..d3369063358b 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/RcFileWriter.java @@ -46,8 +46,8 @@ import static io.airlift.slice.Slices.utf8Slice; import static io.airlift.units.DataSize.Unit.KILOBYTE; import static io.airlift.units.DataSize.Unit.MEGABYTE; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.writeLengthPrefixedString; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.writeVInt; +import static io.trino.hive.formats.ReadWriteUtils.writeLengthPrefixedString; +import static io.trino.hive.formats.ReadWriteUtils.writeVInt; import static io.trino.hive.formats.rcfile.RcFileReader.validateFile; import static java.lang.StrictMath.toIntExact; import static java.util.Objects.requireNonNull; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryEncoding.java index 1d882e344df2..7cd095afd296 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/BinaryEncoding.java @@ -21,9 +21,9 @@ import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.decodeVIntSize; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.readVInt; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.writeVInt; +import static io.trino.hive.formats.ReadWriteUtils.decodeVIntSize; +import static io.trino.hive.formats.ReadWriteUtils.readVInt; +import static io.trino.hive.formats.ReadWriteUtils.writeVInt; import static java.lang.Math.toIntExact; public class BinaryEncoding diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DateEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DateEncoding.java index 37adf8f72d29..3b872c313d9d 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DateEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DateEncoding.java @@ -21,9 +21,9 @@ import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.decodeVIntSize; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.readVInt; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.writeVInt; +import static io.trino.hive.formats.ReadWriteUtils.decodeVIntSize; +import static io.trino.hive.formats.ReadWriteUtils.readVInt; +import static io.trino.hive.formats.ReadWriteUtils.writeVInt; import static java.lang.Math.toIntExact; public class DateEncoding diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DecimalEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DecimalEncoding.java index 2e0671cb48ab..cf2eddeaad7a 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DecimalEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/DecimalEncoding.java @@ -28,9 +28,9 @@ import java.math.BigInteger; import static com.google.common.base.Preconditions.checkState; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.decodeVIntSize; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.readVInt; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.writeVInt; +import static io.trino.hive.formats.ReadWriteUtils.decodeVIntSize; +import static io.trino.hive.formats.ReadWriteUtils.readVInt; +import static io.trino.hive.formats.ReadWriteUtils.writeVInt; import static io.trino.spi.type.Decimals.rescale; import static java.lang.Math.toIntExact; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ListEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ListEncoding.java index ecf16a1cc62c..ca455c4d0ae7 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ListEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/ListEncoding.java @@ -15,7 +15,7 @@ import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.hive.formats.rcfile.RcFileDecoderUtils; +import io.trino.hive.formats.ReadWriteUtils; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; @@ -37,7 +37,7 @@ public ListEncoding(Type type, BinaryColumnEncoding elementEncoding) public void encodeValue(Block block, int position, SliceOutput output) { Block list = block.getObject(position, Block.class); - RcFileDecoderUtils.writeVInt(output, list.getPositionCount()); + ReadWriteUtils.writeVInt(output, list.getPositionCount()); // write null bits int nullByte = 0; @@ -64,8 +64,8 @@ public void encodeValue(Block block, int position, SliceOutput output) public void decodeValueInto(BlockBuilder builder, Slice slice, int offset, int length) { // entries in list - int entries = toIntExact(RcFileDecoderUtils.readVInt(slice, offset)); - offset += RcFileDecoderUtils.decodeVIntSize(slice.getByte(offset)); + int entries = toIntExact(ReadWriteUtils.readVInt(slice, offset)); + offset += ReadWriteUtils.decodeVIntSize(slice.getByte(offset)); // null bytes int nullByteCur = offset; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/LongEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/LongEncoding.java index d621fd5b6669..c8f60434526f 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/LongEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/LongEncoding.java @@ -15,9 +15,9 @@ import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; +import io.trino.hive.formats.ReadWriteUtils; import io.trino.hive.formats.rcfile.ColumnData; import io.trino.hive.formats.rcfile.EncodeOutput; -import io.trino.hive.formats.rcfile.RcFileDecoderUtils; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; @@ -37,7 +37,7 @@ public void encodeColumn(Block block, SliceOutput output, EncodeOutput encodeOut { for (int position = 0; position < block.getPositionCount(); position++) { if (!block.isNull(position)) { - RcFileDecoderUtils.writeVLong(output, type.getLong(block, position)); + ReadWriteUtils.writeVLong(output, type.getLong(block, position)); } encodeOutput.closeEntry(); } @@ -46,7 +46,7 @@ public void encodeColumn(Block block, SliceOutput output, EncodeOutput encodeOut @Override public void encodeValueInto(Block block, int position, SliceOutput output) { - RcFileDecoderUtils.writeVLong(output, type.getLong(block, position)); + ReadWriteUtils.writeVLong(output, type.getLong(block, position)); } @Override @@ -63,7 +63,7 @@ public Block decodeColumn(ColumnData columnData) builder.appendNull(); } else { - type.writeLong(builder, RcFileDecoderUtils.readVInt(slice, offset, length)); + type.writeLong(builder, ReadWriteUtils.readVInt(slice, offset, length)); } } return builder.build(); @@ -78,12 +78,12 @@ public int getValueOffset(Slice slice, int offset) @Override public int getValueLength(Slice slice, int offset) { - return RcFileDecoderUtils.decodeVIntSize(slice, offset); + return ReadWriteUtils.decodeVIntSize(slice, offset); } @Override public void decodeValueInto(BlockBuilder builder, Slice slice, int offset, int length) { - type.writeLong(builder, RcFileDecoderUtils.readVInt(slice, offset, length)); + type.writeLong(builder, ReadWriteUtils.readVInt(slice, offset, length)); } } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/MapEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/MapEncoding.java index 1162a1a3025a..6886db230e5c 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/MapEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/MapEncoding.java @@ -15,7 +15,7 @@ import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; -import io.trino.hive.formats.rcfile.RcFileDecoderUtils; +import io.trino.hive.formats.ReadWriteUtils; import io.trino.spi.StandardErrorCode; import io.trino.spi.TrinoException; import io.trino.spi.block.Block; @@ -43,7 +43,7 @@ public void encodeValue(Block block, int position, SliceOutput output) Block map = block.getObject(position, Block.class); // write entry count - RcFileDecoderUtils.writeVInt(output, map.getPositionCount() / 2); + ReadWriteUtils.writeVInt(output, map.getPositionCount() / 2); // write null bits int nullByte = 0b0101_0101; @@ -84,8 +84,8 @@ public void encodeValue(Block block, int position, SliceOutput output) public void decodeValueInto(BlockBuilder builder, Slice slice, int offset, int length) { // entries in list - int entries = toIntExact(RcFileDecoderUtils.readVInt(slice, offset)); - offset += RcFileDecoderUtils.decodeVIntSize(slice.getByte(offset)); + int entries = toIntExact(ReadWriteUtils.readVInt(slice, offset)); + offset += ReadWriteUtils.decodeVIntSize(slice.getByte(offset)); // null bytes int nullByteCur = offset; diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StringEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StringEncoding.java index ac5c335685a0..eebf717aa4b9 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StringEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/StringEncoding.java @@ -22,10 +22,10 @@ import io.trino.spi.type.Type; import static io.airlift.slice.Slices.EMPTY_SLICE; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.calculateTruncationLength; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.decodeVIntSize; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.readVInt; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.writeVInt; +import static io.trino.hive.formats.ReadWriteUtils.calculateTruncationLength; +import static io.trino.hive.formats.ReadWriteUtils.decodeVIntSize; +import static io.trino.hive.formats.ReadWriteUtils.readVInt; +import static io.trino.hive.formats.ReadWriteUtils.writeVInt; import static java.lang.Math.toIntExact; public class StringEncoding diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/TimestampEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/TimestampEncoding.java index 924aebb2fc0a..89d7b3fb6de4 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/TimestampEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/binary/TimestampEncoding.java @@ -15,9 +15,9 @@ import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; +import io.trino.hive.formats.ReadWriteUtils; import io.trino.hive.formats.rcfile.ColumnData; import io.trino.hive.formats.rcfile.EncodeOutput; -import io.trino.hive.formats.rcfile.RcFileDecoderUtils; import io.trino.hive.formats.rcfile.TimestampHolder; import io.trino.plugin.base.type.DecodedTimestamp; import io.trino.plugin.base.type.TrinoTimestampEncoder; @@ -97,12 +97,12 @@ public int getValueLength(Slice slice, int offset) { int length = 4; if (hasNanosVInt(slice.getByte(offset))) { - int nanosVintLength = RcFileDecoderUtils.decodeVIntSize(slice, offset + 4); + int nanosVintLength = ReadWriteUtils.decodeVIntSize(slice, offset + 4); length += nanosVintLength; // is there extra data for "seconds" - if (RcFileDecoderUtils.isNegativeVInt(slice, offset + 4)) { - length += RcFileDecoderUtils.decodeVIntSize(slice, offset + 4 + nanosVintLength); + if (ReadWriteUtils.isNegativeVInt(slice, offset + 4)) { + length += ReadWriteUtils.decodeVIntSize(slice, offset + 4 + nanosVintLength); } } return length; @@ -133,15 +133,15 @@ private static DecodedTimestamp getTimestamp(Slice slice, int offset) // this is an inline version of readVint, so it can be stitched together // the code to read the seconds high bits below byte nanosFirstByte = slice.getByte(offset); - int nanosLength = RcFileDecoderUtils.decodeVIntSize(nanosFirstByte); - nanos = (int) RcFileDecoderUtils.readVInt(slice, offset, nanosLength); + int nanosLength = ReadWriteUtils.decodeVIntSize(nanosFirstByte); + nanos = (int) ReadWriteUtils.readVInt(slice, offset, nanosLength); nanos = decodeNanos(nanos); // read seconds (high 32 bits) - if (RcFileDecoderUtils.isNegativeVInt(nanosFirstByte)) { + if (ReadWriteUtils.isNegativeVInt(nanosFirstByte)) { // We compose the seconds field from two parts. The lowest 31 bits come from the first four // bytes. The higher-order bits come from the second VInt that follows the nanos field. - long highBits = RcFileDecoderUtils.readVInt(slice, offset + nanosLength); + long highBits = ReadWriteUtils.readVInt(slice, offset + nanosLength); seconds |= (highBits << 31); } } @@ -205,12 +205,12 @@ private static void writeTimestamp(long seconds, int nanos, SliceOutput output) if (hasSecondsHigh32 || nanosReversed != 0) { // The sign of the reversed-nanoseconds field indicates that there is a second VInt present int value = hasSecondsHigh32 ? ~nanosReversed : nanosReversed; - RcFileDecoderUtils.writeVInt(output, value); + ReadWriteUtils.writeVInt(output, value); } if (hasSecondsHigh32) { int secondsHigh32 = (int) (seconds >> 31); - RcFileDecoderUtils.writeVInt(output, secondsHigh32); + ReadWriteUtils.writeVInt(output, secondsHigh32); } } diff --git a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/StringEncoding.java b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/StringEncoding.java index 76486ba7fdf7..3fc5fa753638 100644 --- a/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/StringEncoding.java +++ b/lib/trino-hive-formats/src/main/java/io/trino/hive/formats/rcfile/text/StringEncoding.java @@ -22,7 +22,7 @@ import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.Type; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.calculateTruncationLength; +import static io.trino.hive.formats.ReadWriteUtils.calculateTruncationLength; public class StringEncoding implements TextColumnEncoding diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileDecoderUtils.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestReadWriteUtils.java similarity index 89% rename from lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileDecoderUtils.java rename to lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestReadWriteUtils.java index 876d9cd17d21..153c3d2ee536 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/TestRcFileDecoderUtils.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/TestReadWriteUtils.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.hive.formats.rcfile; +package io.trino.hive.formats; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; @@ -23,7 +23,7 @@ import static org.testng.Assert.assertEquals; -public class TestRcFileDecoderUtils +public class TestReadWriteUtils { @Test public void testVInt() @@ -58,10 +58,10 @@ private static void assertVIntRoundTrip(SliceOutput output, long value) long readValueOld = WritableUtils.readVLong(oldBytes.getInput()); assertEquals(readValueOld, value); - long readValueNew = RcFileDecoderUtils.readVInt(oldBytes, 0); + long readValueNew = ReadWriteUtils.readVInt(oldBytes, 0); assertEquals(readValueNew, value); - long readValueNewStream = RcFileDecoderUtils.readVInt(oldBytes.getInput()); + long readValueNewStream = ReadWriteUtils.readVInt(oldBytes.getInput()); assertEquals(readValueNewStream, value); } @@ -73,7 +73,7 @@ private static Slice writeVintOld(SliceOutput output, long value) Slice vLongOld = Slices.copyOf(output.slice()); output.reset(); - RcFileDecoderUtils.writeVLong(output, value); + ReadWriteUtils.writeVLong(output, value); Slice vLongNew = Slices.copyOf(output.slice()); assertEquals(vLongNew, vLongOld); @@ -84,7 +84,7 @@ private static Slice writeVintOld(SliceOutput output, long value) assertEquals(vIntOld, vLongOld); output.reset(); - RcFileDecoderUtils.writeVInt(output, (int) value); + ReadWriteUtils.writeVInt(output, (int) value); Slice vIntNew = Slices.copyOf(output.slice()); assertEquals(vIntNew, vLongOld); } diff --git a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java index ca062f00f6fd..c60224f69d43 100644 --- a/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java +++ b/lib/trino-hive-formats/src/test/java/io/trino/hive/formats/rcfile/RcFileTester.java @@ -118,7 +118,7 @@ import static io.airlift.slice.SizeOf.SIZE_OF_LONG; import static io.airlift.units.DataSize.Unit.KILOBYTE; import static io.trino.hadoop.ConfigurationInstantiator.newEmptyConfiguration; -import static io.trino.hive.formats.rcfile.RcFileDecoderUtils.findFirstSyncPosition; +import static io.trino.hive.formats.ReadWriteUtils.findFirstSyncPosition; import static io.trino.hive.formats.rcfile.RcFileTester.Compression.BZIP2; import static io.trino.hive.formats.rcfile.RcFileTester.Compression.GZIP; import static io.trino.hive.formats.rcfile.RcFileTester.Compression.LZ4;