From 64fb3dbf950084da18c723e466582a154bc40966 Mon Sep 17 00:00:00 2001 From: imjalpreet Date: Wed, 30 Dec 2020 18:39:57 +0530 Subject: [PATCH] Update to Hive 3.0.0 Fix TestOrcBatchPageSourceMemoryTracking unit test failures Fork BloomFilter class from org.apache.hive.common.util.BloomFilter Fix presto-orc plugin unit test failures Co-authored-by: David Phillips --- pom.xml | 10 +- .../presto/hive/metastore/MetastoreUtil.java | 5 +- .../metastore/thrift/ThriftMetastoreUtil.java | 3 +- .../hive/metastore/TestHiveMetastoreUtil.java | 5 +- .../thrift/TestThriftHiveMetastoreUtil.java | 7 +- presto-hive/pom.xml | 45 --- .../presto/hive/GenericHiveRecordCursor.java | 1 - .../presto/hive/HiveCompressionCodec.java | 2 +- .../com/facebook/presto/hive/HiveUtil.java | 7 +- .../facebook/presto/hive/HiveWriteUtils.java | 1 - .../presto/hive/OrcFileWriterFactory.java | 7 +- .../presto/hive/ParquetFileWriterConfig.java | 2 +- .../presto/hive/ParquetRecordWriterUtil.java | 6 +- .../presto/hive/RecordFileWriter.java | 1 - .../hive/parquet/ParquetFileWriter.java | 2 +- .../parquet/ParquetFileWriterFactory.java | 4 +- .../presto/hive/rcfile/RcFilePageSource.java | 8 +- .../hive/rcfile/RcFilePageSourceFactory.java | 6 +- .../presto/hive/util/ConfigurationUtils.java | 10 +- .../hive/AbstractTestHiveFileFormats.java | 10 +- .../hive/TestHiveIntegrationSmokeTest.java | 16 + .../TestOrcBatchPageSourceMemoryTracking.java | 18 +- .../hive/TestParquetFileWriterConfig.java | 2 +- .../parquet/AbstractTestParquetReader.java | 25 +- .../presto/hive/parquet/ParquetTester.java | 22 +- .../write/MapKeyValuesSchemaConverter.java | 16 +- ...LevelArrayMapKeyValuesSchemaConverter.java | 16 +- .../SingleLevelArraySchemaConverter.java | 16 +- .../write/TestDataWritableWriteSupport.java | 8 +- .../parquet/write/TestDataWritableWriter.java | 10 +- .../write/TestMapredParquetOutputFormat.java | 4 +- .../presto/orc/TupleDomainOrcPredicate.java | 2 +- .../orc/metadata/statistics/BloomFilter.java | 324 ++++++++++++++++++ .../metadata/statistics/HiveBloomFilter.java | 1 - .../com/facebook/presto/orc/OrcTester.java | 31 +- .../presto/orc/TestCachingOrcDataSource.java | 12 +- .../presto/orc/TestOrcBloomFilters.java | 2 +- .../presto/orc/TestOrcReaderPositions.java | 13 +- presto-parquet/pom.xml | 64 ---- .../presto/parquet/ParquetTypeUtils.java | 1 + .../parquet/dictionary/BinaryDictionary.java | 2 +- .../apache/parquet/io/ColumnIOConverter.java | 6 +- .../presto/parquet/ParquetTestUtils.java | 30 +- .../facebook/presto/rcfile/RcFileTester.java | 6 +- 44 files changed, 494 insertions(+), 295 deletions(-) create mode 100644 presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/BloomFilter.java diff --git a/pom.xml b/pom.xml index 125fa4aa374d2..6f608ba90c10e 100644 --- a/pom.xml +++ b/pom.xml @@ -55,7 +55,7 @@ 19.3.0.0 1.32 2.10.6 - 1.50 + 1.51 6.10 3.8.0 1.2.3 @@ -579,7 +579,7 @@ com.facebook.presto.hive hive-apache - 1.2.2-2 + 3.0.0-2 @@ -1019,12 +1019,8 @@ org.apache.thrift libthrift - 0.9.1 + 0.9.3 - - org.apache.commons - commons-lang3 - org.apache.httpcomponents httpcore diff --git a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/MetastoreUtil.java b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/MetastoreUtil.java index cde66e85d8b1b..42df84a754fce 100644 --- a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/MetastoreUtil.java +++ b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/MetastoreUtil.java @@ -47,6 +47,7 @@ import com.facebook.presto.spi.TableNotFoundException; import com.facebook.presto.spi.statistics.ColumnStatisticType; import com.google.common.base.CharMatcher; +import com.google.common.base.Joiner; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; @@ -109,7 +110,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.stream.Collectors.toList; import static org.apache.hadoop.hive.common.FileUtils.unescapePathName; -import static org.apache.hadoop.hive.metastore.MetaStoreUtils.typeToThriftType; +import static org.apache.hadoop.hive.metastore.ColumnType.typeToThriftType; import static org.apache.hadoop.hive.metastore.ProtectMode.getProtectModeFromString; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_COUNT; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_FIELD_NAME; @@ -217,7 +218,7 @@ public static Properties getHiveSchema( if (storage.getBucketProperty().isPresent()) { List bucketedBy = storage.getBucketProperty().get().getBucketedBy(); if (!bucketedBy.isEmpty()) { - schema.setProperty(BUCKET_FIELD_NAME, bucketedBy.get(0)); + schema.setProperty(BUCKET_FIELD_NAME, Joiner.on(",").join(bucketedBy)); } schema.setProperty(BUCKET_COUNT, Integer.toString(storage.getBucketProperty().get().getBucketCount())); } diff --git a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftMetastoreUtil.java b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftMetastoreUtil.java index 2bc2ea0d5cbd5..45f096ebb94ca 100644 --- a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftMetastoreUtil.java +++ b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftMetastoreUtil.java @@ -39,6 +39,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Streams; +import com.google.common.primitives.Shorts; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; @@ -822,7 +823,7 @@ public static Date toMetastoreDate(LocalDate date) public static Decimal toMetastoreDecimal(BigDecimal decimal) { - return new Decimal(ByteBuffer.wrap(decimal.unscaledValue().toByteArray()), (short) decimal.scale()); + return new Decimal(Shorts.checkedCast(decimal.scale()), ByteBuffer.wrap(decimal.unscaledValue().toByteArray())); } private static OptionalLong toMetastoreDistinctValuesCount(OptionalLong distinctValuesCount, OptionalLong nullsCount) diff --git a/presto-hive-metastore/src/test/java/com/facebook/presto/hive/metastore/TestHiveMetastoreUtil.java b/presto-hive-metastore/src/test/java/com/facebook/presto/hive/metastore/TestHiveMetastoreUtil.java index bb46800209699..b25f32ef8a1b5 100644 --- a/presto-hive-metastore/src/test/java/com/facebook/presto/hive/metastore/TestHiveMetastoreUtil.java +++ b/presto-hive-metastore/src/test/java/com/facebook/presto/hive/metastore/TestHiveMetastoreUtil.java @@ -18,13 +18,13 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMultimap; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.testng.annotations.Test; import java.util.List; @@ -37,6 +37,7 @@ import static com.facebook.presto.hive.HiveType.HIVE_STRING; import static com.facebook.presto.hive.metastore.MetastoreUtil.getHiveSchema; import static com.facebook.presto.hive.metastore.MetastoreUtil.reconstructPartitionSchema; +import static org.apache.hadoop.hive.serde.serdeConstants.COLUMN_NAME_DELIMITER; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.testng.Assert.assertEquals; @@ -149,6 +150,7 @@ public void testPartitionRoundTrip() public void testHiveSchemaTable() { Properties expected = MetaStoreUtils.getTableMetadata(TEST_TABLE_WITH_UNSUPPORTED_FIELDS); + expected.remove(COLUMN_NAME_DELIMITER); Properties actual = getHiveSchema(ThriftMetastoreUtil.fromMetastoreApiTable(TEST_TABLE_WITH_UNSUPPORTED_FIELDS, TEST_SCHEMA)); assertEquals(actual, expected); } @@ -157,6 +159,7 @@ public void testHiveSchemaTable() public void testHiveSchemaPartition() { Properties expected = MetaStoreUtils.getPartitionMetadata(TEST_PARTITION_WITH_UNSUPPORTED_FIELDS, TEST_TABLE_WITH_UNSUPPORTED_FIELDS); + expected.remove(COLUMN_NAME_DELIMITER); Properties actual = getHiveSchema(ThriftMetastoreUtil.fromMetastoreApiPartition(TEST_PARTITION_WITH_UNSUPPORTED_FIELDS, TEST_PARTITION_VERSION_FETCHER), ThriftMetastoreUtil.fromMetastoreApiTable(TEST_TABLE_WITH_UNSUPPORTED_FIELDS, TEST_SCHEMA)); assertEquals(actual, expected); } diff --git a/presto-hive-metastore/src/test/java/com/facebook/presto/hive/metastore/thrift/TestThriftHiveMetastoreUtil.java b/presto-hive-metastore/src/test/java/com/facebook/presto/hive/metastore/thrift/TestThriftHiveMetastoreUtil.java index 3a7c097d2204a..b2f9f362d97f3 100644 --- a/presto-hive-metastore/src/test/java/com/facebook/presto/hive/metastore/thrift/TestThriftHiveMetastoreUtil.java +++ b/presto-hive-metastore/src/test/java/com/facebook/presto/hive/metastore/thrift/TestThriftHiveMetastoreUtil.java @@ -26,7 +26,6 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Date; import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; -import org.apache.hadoop.hive.metastore.api.Decimal; import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; @@ -34,7 +33,6 @@ import org.testng.annotations.Test; import java.math.BigDecimal; -import java.nio.ByteBuffer; import java.time.LocalDate; import java.util.Optional; import java.util.OptionalDouble; @@ -43,6 +41,7 @@ import static com.facebook.presto.hive.metastore.MetastoreUtil.getHiveBasicStatistics; import static com.facebook.presto.hive.metastore.MetastoreUtil.updateStatisticsParameters; import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiColumnStatistics; +import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreDecimal; import static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.binaryStats; import static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.booleanStats; import static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.dateStats; @@ -146,9 +145,9 @@ public void testDecimalStatsToColumnStatistics() { DecimalColumnStatsData decimalColumnStatsData = new DecimalColumnStatsData(); BigDecimal low = new BigDecimal("0"); - decimalColumnStatsData.setLowValue(new Decimal(ByteBuffer.wrap(low.unscaledValue().toByteArray()), (short) low.scale())); + decimalColumnStatsData.setLowValue(toMetastoreDecimal(low)); BigDecimal high = new BigDecimal("100"); - decimalColumnStatsData.setHighValue(new Decimal(ByteBuffer.wrap(high.unscaledValue().toByteArray()), (short) high.scale())); + decimalColumnStatsData.setHighValue(toMetastoreDecimal(high)); decimalColumnStatsData.setNumNulls(1); decimalColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(decimalColumnStatsData)); diff --git a/presto-hive/pom.xml b/presto-hive/pom.xml index 5f4ccc0a9c3d9..6210d75ba8513 100644 --- a/presto-hive/pom.xml +++ b/presto-hive/pom.xml @@ -35,12 +35,6 @@ com.facebook.presto presto-parquet - - - org.apache.parquet - parquet-encoding - - @@ -53,38 +47,6 @@ presto-cache - - org.apache.parquet - parquet-column - ${dep.parquet.version} - - - org.slf4j - slf4j-api - - - commons-codec - commons-codec - - - - - - org.apache.parquet - parquet-hadoop - ${dep.parquet.version} - - - org.slf4j - slf4j-api - - - org.xerial.snappy - snappy-java - - - - org.apache.hudi hudi-hadoop-mr @@ -291,13 +253,6 @@ runtime - - - org.apache.parquet - parquet-common - ${dep.parquet.version} - - com.facebook.presto diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/GenericHiveRecordCursor.java b/presto-hive/src/main/java/com/facebook/presto/hive/GenericHiveRecordCursor.java index 830e0ba936443..cd9f0344622ef 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/GenericHiveRecordCursor.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/GenericHiveRecordCursor.java @@ -93,7 +93,6 @@ class GenericHiveRecordCursor private final K key; private final V value; - @SuppressWarnings("deprecation") private final Deserializer deserializer; private final Type[] types; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveCompressionCodec.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveCompressionCodec.java index b760d2986bb7f..020007c885382 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveCompressionCodec.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveCompressionCodec.java @@ -17,7 +17,7 @@ import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.io.compress.SnappyCodec; -import parquet.hadoop.metadata.CompressionCodecName; +import org.apache.parquet.hadoop.metadata.CompressionCodecName; import java.util.Optional; import java.util.function.Predicate; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java index 88d27cfd5a3d7..e10c48be3a23b 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java @@ -404,7 +404,7 @@ public static boolean isSplittable(InputFormat inputFormat, FileSystem fil } } - public static StructObjectInspector getTableObjectInspector(@SuppressWarnings("deprecation") Deserializer deserializer) + public static StructObjectInspector getTableObjectInspector(Deserializer deserializer) { try { ObjectInspector inspector = deserializer.getObjectInspector(); @@ -428,7 +428,6 @@ public static String getDeserializerClassName(Properties schema) return name; } - @SuppressWarnings("deprecation") public static Deserializer getDeserializer(Configuration configuration, Properties schema) { String name = getDeserializerClassName(schema); @@ -438,7 +437,6 @@ public static Deserializer getDeserializer(Configuration configuration, Properti return deserializer; } - @SuppressWarnings("deprecation") private static Class getDeserializerClass(String name) { // CDH uses different names for Parquet @@ -461,7 +459,6 @@ private static Class getDeserializerClass(String name) } } - @SuppressWarnings("deprecation") private static Deserializer createDeserializer(Class clazz) { try { @@ -472,7 +469,6 @@ private static Deserializer createDeserializer(Class cla } } - @SuppressWarnings("deprecation") private static void initializeDeserializer(Configuration configuration, Deserializer deserializer, Properties schema) { try { @@ -485,7 +481,6 @@ private static void initializeDeserializer(Configuration configuration, Deserial } } - @SuppressWarnings("deprecation") private static void validate(Deserializer deserializer) { if (deserializer instanceof AbstractSerDe && !((AbstractSerDe) deserializer).getConfigurationErrors().isEmpty()) { diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriteUtils.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriteUtils.java index 802e550f7a91d..243a7b4bb0f83 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriteUtils.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriteUtils.java @@ -217,7 +217,6 @@ public void close(boolean abort) }; } - @SuppressWarnings("deprecation") public static Serializer initializeSerializer(Configuration conf, Properties properties, String serializerName) { try { diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/OrcFileWriterFactory.java b/presto-hive/src/main/java/com/facebook/presto/hive/OrcFileWriterFactory.java index 67ace92564876..55bde3a71d3a7 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/OrcFileWriterFactory.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/OrcFileWriterFactory.java @@ -39,9 +39,9 @@ import io.airlift.slice.Slice; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.io.orc.OrcFile.OrcTableProperties; import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; import org.apache.hadoop.mapred.JobConf; +import org.apache.orc.OrcConf; import org.joda.time.DateTimeZone; import org.weakref.jmx.Flatten; import org.weakref.jmx.Managed; @@ -292,10 +292,7 @@ public DataSink createDataSink(ConnectorSession session, FileSystem fileSystem, private static CompressionKind getCompression(Properties schema, JobConf configuration, OrcEncoding orcEncoding) { - String compressionName = schema.getProperty(OrcTableProperties.COMPRESSION.getPropName()); - if (compressionName == null) { - compressionName = configuration.get("hive.exec.orc.default.compress"); - } + String compressionName = OrcConf.COMPRESS.getString(schema, configuration); if (compressionName == null) { return CompressionKind.ZLIB; } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/ParquetFileWriterConfig.java b/presto-hive/src/main/java/com/facebook/presto/hive/ParquetFileWriterConfig.java index 5c9f82750ac4c..ed2077d64a39f 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/ParquetFileWriterConfig.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/ParquetFileWriterConfig.java @@ -15,7 +15,7 @@ import com.facebook.airlift.configuration.Config; import io.airlift.units.DataSize; -import parquet.hadoop.ParquetWriter; +import org.apache.parquet.hadoop.ParquetWriter; import static io.airlift.units.DataSize.Unit.BYTE; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/ParquetRecordWriterUtil.java b/presto-hive/src/main/java/com/facebook/presto/hive/ParquetRecordWriterUtil.java index 7750b58bd9499..05ddc120aaa43 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/ParquetRecordWriterUtil.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/ParquetRecordWriterUtil.java @@ -23,9 +23,9 @@ import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; -import parquet.hadoop.ParquetFileWriter; -import parquet.hadoop.ParquetOutputFormat; -import parquet.hadoop.ParquetRecordWriter; +import org.apache.parquet.hadoop.ParquetFileWriter; +import org.apache.parquet.hadoop.ParquetOutputFormat; +import org.apache.parquet.hadoop.ParquetRecordWriter; import java.io.IOException; import java.lang.reflect.Field; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/RecordFileWriter.java b/presto-hive/src/main/java/com/facebook/presto/hive/RecordFileWriter.java index b8df879b2f08e..476b6e186ce4e 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/RecordFileWriter.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/RecordFileWriter.java @@ -64,7 +64,6 @@ public class RecordFileWriter private final Path path; private final JobConf conf; private final int fieldCount; - @SuppressWarnings("deprecation") private final Serializer serializer; private final RecordWriter recordWriter; private final SettableStructObjectInspector tableInspector; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetFileWriter.java b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetFileWriter.java index 21b5cc3286560..de8c2b2cce0a4 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetFileWriter.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetFileWriter.java @@ -23,8 +23,8 @@ import com.facebook.presto.parquet.writer.ParquetWriterOptions; import com.facebook.presto.spi.PrestoException; import com.google.common.collect.ImmutableList; +import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.openjdk.jol.info.ClassLayout; -import parquet.hadoop.metadata.CompressionCodecName; import java.io.IOException; import java.io.OutputStream; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetFileWriterFactory.java b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetFileWriterFactory.java index 233b257574bf0..45f82f360c7e9 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetFileWriterFactory.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetFileWriterFactory.java @@ -31,9 +31,9 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat; import org.apache.hadoop.mapred.JobConf; +import org.apache.parquet.hadoop.ParquetOutputFormat; +import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.joda.time.DateTimeZone; -import parquet.hadoop.ParquetOutputFormat; -import parquet.hadoop.metadata.CompressionCodecName; import java.io.IOException; import java.util.List; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/rcfile/RcFilePageSource.java b/presto-hive/src/main/java/com/facebook/presto/hive/rcfile/RcFilePageSource.java index 1ace55fe2458b..a37427cc52fac 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/rcfile/RcFilePageSource.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/rcfile/RcFilePageSource.java @@ -29,7 +29,6 @@ import com.facebook.presto.spi.PrestoException; import com.google.common.collect.ImmutableList; import io.airlift.units.DataSize; -import org.joda.time.DateTimeZone; import java.io.IOException; import java.util.List; @@ -60,15 +59,10 @@ public class RcFilePageSource private boolean closed; - public RcFilePageSource( - RcFileReader rcFileReader, - List columns, - DateTimeZone hiveStorageTimeZone, - TypeManager typeManager) + public RcFilePageSource(RcFileReader rcFileReader, List columns, TypeManager typeManager) { requireNonNull(rcFileReader, "rcReader is null"); requireNonNull(columns, "columns is null"); - requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null"); requireNonNull(typeManager, "typeManager is null"); this.rcFileReader = rcFileReader; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/rcfile/RcFilePageSourceFactory.java b/presto-hive/src/main/java/com/facebook/presto/hive/rcfile/RcFilePageSourceFactory.java index c74d6f3e5b147..4c0e7f82468a2 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/rcfile/RcFilePageSourceFactory.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/rcfile/RcFilePageSourceFactory.java @@ -159,11 +159,7 @@ else if (ColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerD length, new DataSize(8, Unit.MEGABYTE)); - return Optional.of(new RcFilePageSource( - rcFileReader, - columns, - hiveStorageTimeZone, - typeManager)); + return Optional.of(new RcFilePageSource(rcFileReader, columns, typeManager)); } catch (Throwable e) { try { diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/util/ConfigurationUtils.java b/presto-hive/src/main/java/com/facebook/presto/hive/util/ConfigurationUtils.java index 12616da54d687..d7a3e0b3a7f0e 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/util/ConfigurationUtils.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/util/ConfigurationUtils.java @@ -16,17 +16,16 @@ import com.facebook.presto.hadoop.FileSystemFactory; import com.facebook.presto.hive.HiveCompressionCodec; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import parquet.hadoop.ParquetOutputFormat; +import org.apache.orc.OrcConf; +import org.apache.parquet.hadoop.ParquetOutputFormat; import java.util.Map; import static com.facebook.hive.orc.OrcConf.ConfVars.HIVE_ORC_COMPRESSION; import static com.google.common.base.Preconditions.checkArgument; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.COMPRESSRESULT; -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_COMPRESS; import static org.apache.hadoop.io.SequenceFile.CompressionType.BLOCK; public final class ConfigurationUtils @@ -100,10 +99,9 @@ private static void setCompressionProperties(Configuration config, HiveCompressi config.setBoolean("mapred.output.compress", compressed); config.setBoolean(FileOutputFormat.COMPRESS, compressed); // For DWRF - config.set(HIVE_ORC_DEFAULT_COMPRESS.varname, compression.getOrcCompressionKind().name()); - config.set(HIVE_ORC_COMPRESSION.varname, compression.getOrcCompressionKind().name()); + com.facebook.hive.orc.OrcConf.setVar(config, HIVE_ORC_COMPRESSION, compression.getOrcCompressionKind().name()); // For ORC - config.set(OrcFile.OrcTableProperties.COMPRESSION.getPropName(), compression.getOrcCompressionKind().name()); + OrcConf.COMPRESS.setString(config, compression.getOrcCompressionKind().name()); // For RCFile and Text if (compression.getCodec().isPresent()) { config.set("mapred.output.compression.codec", compression.getCodec().get().getName()); diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileFormats.java b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileFormats.java index f62f8f37ec6f2..4b9fb830c7958 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileFormats.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileFormats.java @@ -53,7 +53,7 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; -import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.Serializer; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; @@ -571,7 +571,7 @@ public static FileSplit createTestFile( throws Exception { HiveOutputFormat outputFormat = newInstance(storageFormat.getOutputFormat(), HiveOutputFormat.class); - @SuppressWarnings("deprecation") SerDe serDe = newInstance(storageFormat.getSerDe(), SerDe.class); + Serializer serializer = newInstance(storageFormat.getSerDe(), Serializer.class); // filter out partition keys, which are not written to the file testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey))); @@ -579,7 +579,7 @@ public static FileSplit createTestFile( Properties tableProperties = new Properties(); tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName))); tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType))); - serDe.initialize(new Configuration(), tableProperties); + serializer.initialize(new Configuration(), tableProperties); JobConf jobConf = configureCompression(new JobConf(), compressionCodec); @@ -592,7 +592,7 @@ public static FileSplit createTestFile( () -> {}); try { - serDe.initialize(new Configuration(), tableProperties); + serializer.initialize(new Configuration(), tableProperties); SettableStructObjectInspector objectInspector = getStandardStructObjectInspector( ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), @@ -611,7 +611,7 @@ public static FileSplit createTestFile( objectInspector.setStructFieldData(row, fields.get(i), writeValue); } - Writable record = serDe.serialize(row, objectInspector); + Writable record = serializer.serialize(row, objectInspector); recordWriter.write(record); } } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java index 50d1d556f5e1b..24d57a4189ee9 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java @@ -294,6 +294,22 @@ public void testIOExplain() assertUpdate("DROP TABLE test_orders"); } + @Test + public void testReadNoColumns() + { + testWithAllStorageFormats(this::testReadNoColumns); + } + + private void testReadNoColumns(Session session, HiveStorageFormat storageFormat) + { + if (!insertOperationsSupported(storageFormat)) { + return; + } + assertUpdate(session, format("CREATE TABLE test_read_no_columns WITH (format = '%s') AS SELECT 0 x", storageFormat), 1); + assertQuery(session, "SELECT count(*) FROM test_read_no_columns", "SELECT 1"); + assertUpdate(session, "DROP TABLE test_read_no_columns"); + } + @Test public void createTableWithEveryType() { diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestOrcBatchPageSourceMemoryTracking.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestOrcBatchPageSourceMemoryTracking.java index aa4b82fcaa036..29d7d6952727f 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestOrcBatchPageSourceMemoryTracking.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestOrcBatchPageSourceMemoryTracking.java @@ -59,15 +59,13 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; -import org.apache.hadoop.hive.ql.io.orc.NullMemoryManager; +import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterOptions; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcSerde; -import org.apache.hadoop.hive.ql.io.orc.OrcWriterOptions; import org.apache.hadoop.hive.ql.io.orc.Writer; -import org.apache.hadoop.hive.ql.io.orc.WriterImpl; -import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.Serializer; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -77,6 +75,8 @@ import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.JobConf; +import org.apache.orc.NullMemoryManager; +import org.apache.orc.impl.WriterImpl; import org.joda.time.DateTimeZone; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; @@ -535,7 +535,7 @@ private DriverContext newDriverContext() public static FileSplit createTestFile(String filePath, HiveOutputFormat outputFormat, - @SuppressWarnings("deprecation") SerDe serDe, + Serializer serializer, String compressionCodec, List testColumns, int numRows, @@ -548,7 +548,7 @@ public static FileSplit createTestFile(String filePath, Properties tableProperties = new Properties(); tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName))); tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType))); - serDe.initialize(CONFIGURATION, tableProperties); + serializer.initialize(CONFIGURATION, tableProperties); JobConf jobConf = new JobConf(); if (compressionCodec != null) { @@ -577,7 +577,7 @@ public static FileSplit createTestFile(String filePath, objectInspector.setStructFieldData(row, fields.get(i), writeValue); } - Writable record = serDe.serialize(row, objectInspector); + Writable record = serializer.serialize(row, objectInspector); recordWriter.write(record); if (rowNumber % stripeRows == stripeRows - 1) { flushStripe(recordWriter); @@ -614,8 +614,8 @@ private static void flushStripe(RecordWriter recordWriter) private static RecordWriter createRecordWriter(Path target, Configuration conf) { try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(FileSystem.class.getClassLoader())) { - WriterOptions options = new OrcWriterOptions(conf) - .memory(new NullMemoryManager(conf)) + WriterOptions options = OrcFile.writerOptions(conf) + .memory(new NullMemoryManager()) .compress(ZLIB); try { diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestParquetFileWriterConfig.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestParquetFileWriterConfig.java index c9c99e9d8be5b..51276274416ca 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestParquetFileWriterConfig.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestParquetFileWriterConfig.java @@ -15,8 +15,8 @@ import com.google.common.collect.ImmutableMap; import io.airlift.units.DataSize; +import org.apache.parquet.hadoop.ParquetWriter; import org.testng.annotations.Test; -import parquet.hadoop.ParquetWriter; import java.util.Map; diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/AbstractTestParquetReader.java b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/AbstractTestParquetReader.java index 7947b6398d4c1..fb6de72fac0cc 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/AbstractTestParquetReader.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/AbstractTestParquetReader.java @@ -40,14 +40,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.parquet.hadoop.metadata.CompressionCodecName; +import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.MessageTypeParser; import org.joda.time.DateTimeZone; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -import parquet.hadoop.ParquetOutputFormat; -import parquet.hadoop.codec.CodecConfig; -import parquet.hadoop.metadata.CompressionCodecName; -import parquet.schema.MessageType; import java.math.BigDecimal; import java.math.BigInteger; @@ -119,14 +117,16 @@ import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector; import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector; import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector; +import static org.apache.parquet.schema.MessageTypeParser.parseMessageType; import static org.testng.Assert.assertEquals; -import static parquet.schema.MessageTypeParser.parseMessageType; public abstract class AbstractTestParquetReader { private static final int MAX_PRECISION_INT32 = (int) maxPrecision(4); private static final int MAX_PRECISION_INT64 = (int) maxPrecision(8); + private Logger parquetLogger; + private final ParquetTester tester; public AbstractTestParquetReader(ParquetTester tester) @@ -138,7 +138,10 @@ public AbstractTestParquetReader(ParquetTester tester) public void setUp() { assertEquals(DateTimeZone.getDefault(), HIVE_STORAGE_TIME_ZONE); - setParquetLogging(); + + // Parquet has excessive logging at INFO level + parquetLogger = Logger.getLogger("org.apache.parquet.hadoop"); + parquetLogger.setLevel(Level.WARNING); } @Test @@ -1527,16 +1530,6 @@ protected T computeNext() }; } - // parquet has excessive logging at INFO level, set them to WARNING - private void setParquetLogging() - { - Logger.getLogger(ParquetOutputFormat.class.getName()).setLevel(Level.WARNING); - Logger.getLogger(CodecConfig.class.getName()).setLevel(Level.WARNING); - // these logging classes are not public, use class name directly - Logger.getLogger("parquet.hadoop.InternalParquetRecordWriter").setLevel(Level.WARNING); - Logger.getLogger("parquet.hadoop.ColumnChunkPageWriteStore").setLevel(Level.WARNING); - } - @Test public void testStructMaxReadBytes() throws Exception diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/ParquetTester.java b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/ParquetTester.java index 39827b15ec23e..1c95bb555bddb 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/ParquetTester.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/ParquetTester.java @@ -72,14 +72,14 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; +import org.apache.parquet.column.ParquetProperties.WriterVersion; import org.apache.parquet.example.data.Group; import org.apache.parquet.example.data.simple.SimpleGroupFactory; import org.apache.parquet.hadoop.example.ExampleParquetWriter; import org.apache.parquet.hadoop.example.GroupWriteSupport; +import org.apache.parquet.hadoop.metadata.CompressionCodecName; +import org.apache.parquet.schema.MessageType; import org.joda.time.DateTimeZone; -import parquet.column.ParquetProperties.WriterVersion; -import parquet.hadoop.metadata.CompressionCodecName; -import parquet.schema.MessageType; import java.io.Closeable; import java.io.File; @@ -133,17 +133,17 @@ import static java.util.Arrays.stream; import static java.util.Collections.singletonList; import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector; +import static org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_1_0; +import static org.apache.parquet.hadoop.ParquetOutputFormat.COMPRESSION; +import static org.apache.parquet.hadoop.ParquetOutputFormat.ENABLE_DICTIONARY; +import static org.apache.parquet.hadoop.ParquetOutputFormat.WRITER_VERSION; +import static org.apache.parquet.hadoop.metadata.CompressionCodecName.GZIP; +import static org.apache.parquet.hadoop.metadata.CompressionCodecName.LZO; +import static org.apache.parquet.hadoop.metadata.CompressionCodecName.SNAPPY; +import static org.apache.parquet.hadoop.metadata.CompressionCodecName.UNCOMPRESSED; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import static parquet.column.ParquetProperties.WriterVersion.PARQUET_1_0; -import static parquet.hadoop.ParquetOutputFormat.COMPRESSION; -import static parquet.hadoop.ParquetOutputFormat.ENABLE_DICTIONARY; -import static parquet.hadoop.ParquetOutputFormat.WRITER_VERSION; -import static parquet.hadoop.metadata.CompressionCodecName.GZIP; -import static parquet.hadoop.metadata.CompressionCodecName.LZO; -import static parquet.hadoop.metadata.CompressionCodecName.SNAPPY; -import static parquet.hadoop.metadata.CompressionCodecName.UNCOMPRESSED; public class ParquetTester { diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/MapKeyValuesSchemaConverter.java b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/MapKeyValuesSchemaConverter.java index dbcf5d8de9ee0..520392304c2bd 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/MapKeyValuesSchemaConverter.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/MapKeyValuesSchemaConverter.java @@ -22,18 +22,18 @@ import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import parquet.schema.GroupType; -import parquet.schema.MessageType; -import parquet.schema.OriginalType; -import parquet.schema.PrimitiveType.PrimitiveTypeName; -import parquet.schema.Type; -import parquet.schema.Type.Repetition; -import parquet.schema.Types; +import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.OriginalType; +import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; +import org.apache.parquet.schema.Type; +import org.apache.parquet.schema.Type.Repetition; +import org.apache.parquet.schema.Types; import java.util.List; import java.util.Locale; -import static parquet.schema.OriginalType.MAP_KEY_VALUE; +import static org.apache.parquet.schema.OriginalType.MAP_KEY_VALUE; /** * This class is copied from org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/SingleLevelArrayMapKeyValuesSchemaConverter.java b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/SingleLevelArrayMapKeyValuesSchemaConverter.java index b6cb85ac5cca6..b19400b4db5a5 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/SingleLevelArrayMapKeyValuesSchemaConverter.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/SingleLevelArrayMapKeyValuesSchemaConverter.java @@ -22,18 +22,18 @@ import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import parquet.schema.GroupType; -import parquet.schema.MessageType; -import parquet.schema.OriginalType; -import parquet.schema.PrimitiveType.PrimitiveTypeName; -import parquet.schema.Type; -import parquet.schema.Type.Repetition; -import parquet.schema.Types; +import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.OriginalType; +import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; +import org.apache.parquet.schema.Type; +import org.apache.parquet.schema.Type.Repetition; +import org.apache.parquet.schema.Types; import java.util.List; import java.util.Locale; -import static parquet.schema.OriginalType.MAP_KEY_VALUE; +import static org.apache.parquet.schema.OriginalType.MAP_KEY_VALUE; /** * This class is copied from org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/SingleLevelArraySchemaConverter.java b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/SingleLevelArraySchemaConverter.java index 500b678e430ea..c49885cb2852d 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/SingleLevelArraySchemaConverter.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/SingleLevelArraySchemaConverter.java @@ -22,14 +22,14 @@ import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import parquet.schema.ConversionPatterns; -import parquet.schema.GroupType; -import parquet.schema.MessageType; -import parquet.schema.OriginalType; -import parquet.schema.PrimitiveType.PrimitiveTypeName; -import parquet.schema.Type; -import parquet.schema.Type.Repetition; -import parquet.schema.Types; +import org.apache.parquet.schema.ConversionPatterns; +import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.OriginalType; +import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; +import org.apache.parquet.schema.Type; +import org.apache.parquet.schema.Type.Repetition; +import org.apache.parquet.schema.Types; import java.util.List; import java.util.Locale; diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/TestDataWritableWriteSupport.java b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/TestDataWritableWriteSupport.java index c94f1d8657d39..66222c3f4e424 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/TestDataWritableWriteSupport.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/TestDataWritableWriteSupport.java @@ -15,13 +15,13 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord; -import parquet.hadoop.api.WriteSupport; -import parquet.io.api.RecordConsumer; -import parquet.schema.MessageType; +import org.apache.parquet.hadoop.api.WriteSupport; +import org.apache.parquet.io.api.RecordConsumer; +import org.apache.parquet.schema.MessageType; import java.util.HashMap; -import static parquet.schema.MessageTypeParser.parseMessageType; +import static org.apache.parquet.schema.MessageTypeParser.parseMessageType; /** * This class is copied from org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/TestDataWritableWriter.java b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/TestDataWritableWriter.java index 639df16a07c03..a9ab713fb8591 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/TestDataWritableWriter.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/TestDataWritableWriter.java @@ -40,11 +40,11 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import parquet.io.api.Binary; -import parquet.io.api.RecordConsumer; -import parquet.schema.GroupType; -import parquet.schema.OriginalType; -import parquet.schema.Type; +import org.apache.parquet.io.api.Binary; +import org.apache.parquet.io.api.RecordConsumer; +import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.OriginalType; +import org.apache.parquet.schema.Type; import java.sql.Date; import java.sql.Timestamp; diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/TestMapredParquetOutputFormat.java b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/TestMapredParquetOutputFormat.java index 6757a702ee463..941e05e05a0d2 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/TestMapredParquetOutputFormat.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/write/TestMapredParquetOutputFormat.java @@ -20,8 +20,8 @@ import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.Progressable; -import parquet.hadoop.ParquetOutputFormat; -import parquet.schema.MessageType; +import org.apache.parquet.hadoop.ParquetOutputFormat; +import org.apache.parquet.schema.MessageType; import java.io.IOException; import java.util.Optional; diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/TupleDomainOrcPredicate.java b/presto-orc/src/main/java/com/facebook/presto/orc/TupleDomainOrcPredicate.java index 1fb00d3aa93c6..ce53e37f9784e 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/TupleDomainOrcPredicate.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/TupleDomainOrcPredicate.java @@ -22,6 +22,7 @@ import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.VarbinaryType; import com.facebook.presto.common.type.VarcharType; +import com.facebook.presto.orc.metadata.statistics.BloomFilter; import com.facebook.presto.orc.metadata.statistics.BooleanStatistics; import com.facebook.presto.orc.metadata.statistics.ColumnStatistics; import com.facebook.presto.orc.metadata.statistics.HiveBloomFilter; @@ -29,7 +30,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import io.airlift.slice.Slice; -import org.apache.hive.common.util.BloomFilter; import java.util.Collection; import java.util.List; diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/BloomFilter.java b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/BloomFilter.java new file mode 100644 index 0000000000000..34b6924fd1611 --- /dev/null +++ b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/BloomFilter.java @@ -0,0 +1,324 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.orc.metadata.statistics; + +import org.apache.hive.common.util.Murmur3; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Arrays; + +public class BloomFilter +{ + protected BloomFilter.BitSet bitSet; + protected int numBits; + protected int numHashFunctions; + public static final int START_OF_SERIALIZED_LONGS = 5; + + public BloomFilter() + { + } + + static void checkArgument(boolean expression, String message) + { + if (!expression) { + throw new IllegalArgumentException(message); + } + } + + public BloomFilter(long expectedEntries, double fpp) + { + checkArgument(expectedEntries > 0L, "expectedEntries should be > 0"); + checkArgument(fpp > 0.0D && fpp < 1.0D, "False positive probability should be > 0.0 & < 1.0"); + int nb = optimalNumOfBits(expectedEntries, fpp); + this.numBits = nb + (64 - nb % 64); + this.numHashFunctions = optimalNumOfHashFunctions(expectedEntries, this.numBits); + this.bitSet = new BitSet(this.numBits); + } + + static int optimalNumOfHashFunctions(long n, long m) + { + return Math.max(1, (int) Math.round((double) m / (double) n * Math.log(2.0D))); + } + + static int optimalNumOfBits(long n, double p) + { + return (int) ((double) (-n) * Math.log(p) / (Math.log(2.0D) * Math.log(2.0D))); + } + + public void add(byte[] val) + { + if (val == null) { + this.addBytes(val, -1, -1); + } + else { + this.addBytes(val, 0, val.length); + } + } + + public void addBytes(byte[] val, int offset, int length) + { + long hash64 = val == null ? 2862933555777941757L : Murmur3.hash64(val, offset, length); + this.addHash(hash64); + } + + private void addHash(long hash64) + { + int hash1 = (int) hash64; + int hash2 = (int) (hash64 >>> 32); + + for (int i = 1; i <= this.numHashFunctions; ++i) { + int combinedHash = hash1 + (i) * hash2; + if (combinedHash < 0) { + combinedHash = ~combinedHash; + } + + int pos = combinedHash % this.numBits; + this.bitSet.set(pos); + } + } + + public void addString(String val) + { + if (val == null) { + this.add(null); + } + else { + this.add(val.getBytes()); + } + } + + public void addLong(long val) + { + this.addHash(this.getLongHash(val)); + } + + public void addDouble(double val) + { + this.addLong(Double.doubleToLongBits(val)); + } + + public boolean test(byte[] val) + { + return val == null ? this.testBytes(val, -1, -1) : this.testBytes(val, 0, val.length); + } + + public boolean testBytes(byte[] val, int offset, int length) + { + long hash64 = val == null ? 2862933555777941757L : Murmur3.hash64(val, offset, length); + return this.testHash(hash64); + } + + private boolean testHash(long hash64) + { + int hash1 = (int) hash64; + int hash2 = (int) (hash64 >>> 32); + + for (int i = 1; i <= this.numHashFunctions; ++i) { + int combinedHash = hash1 + (i) * hash2; + if (combinedHash < 0) { + combinedHash = ~combinedHash; + } + + int pos = combinedHash % this.numBits; + if (!this.bitSet.get(pos)) { + return false; + } + } + + return true; + } + + public boolean testString(String val) + { + return val == null ? this.test(null) : this.test(val.getBytes()); + } + + public boolean testLong(long val) + { + return this.testHash(this.getLongHash(val)); + } + + private long getLongHash(long key) + { + key = ~key + (key << 21); + key ^= key >> 24; + key = key + (key << 3) + (key << 8); + key ^= key >> 14; + key = key + (key << 2) + (key << 4); + key ^= key >> 28; + key += key << 31; + return key; + } + + public boolean testDouble(double val) + { + return this.testLong(Double.doubleToLongBits(val)); + } + + public long sizeInBytes() + { + return this.getBitSize() / 8; + } + + public int getBitSize() + { + return this.bitSet.getData().length * 64; + } + + public int getNumHashFunctions() + { + return this.numHashFunctions; + } + + public long[] getBitSet() + { + return this.bitSet.getData(); + } + + public String toString() + { + return "m: " + this.numBits + " k: " + this.numHashFunctions; + } + + public void merge(BloomFilter that) + { + if (this != that && this.numBits == that.numBits && this.numHashFunctions == that.numHashFunctions) { + this.bitSet.putAll(that.bitSet); + } + else { + throw new IllegalArgumentException("BloomFilters are not compatible for merging. this - " + this.toString() + " that - " + that.toString()); + } + } + + public void reset() + { + this.bitSet.clear(); + } + + public static void serialize(OutputStream out, BloomFilter bloomFilter) throws IOException + { + DataOutputStream dataOutputStream = new DataOutputStream(out); + dataOutputStream.writeByte(bloomFilter.numHashFunctions); + dataOutputStream.writeInt(bloomFilter.getBitSet().length); + long[] var3 = bloomFilter.getBitSet(); + int var4 = var3.length; + + for (int var5 = 0; var5 < var4; ++var5) { + long value = var3[var5]; + dataOutputStream.writeLong(value); + } + } + + public static org.apache.hive.common.util.BloomFilter deserialize(InputStream in) throws IOException + { + if (in == null) { + throw new IOException("Input stream is null"); + } + else { + try { + DataInputStream dataInputStream = new DataInputStream(in); + int numHashFunc = dataInputStream.readByte(); + int numLongs = dataInputStream.readInt(); + long[] data = new long[numLongs]; + + for (int i = 0; i < numLongs; ++i) { + data[i] = dataInputStream.readLong(); + } + + return new org.apache.hive.common.util.BloomFilter(data, numHashFunc); + } + catch (RuntimeException var6) { + IOException io = new IOException("Unable to deserialize BloomFilter"); + io.initCause(var6); + throw io; + } + } + } + + public static void mergeBloomFilterBytes(byte[] bf1Bytes, int bf1Start, int bf1Length, byte[] bf2Bytes, int bf2Start, int bf2Length) + { + if (bf1Length != bf2Length) { + throw new IllegalArgumentException("bf1Length " + bf1Length + " does not match bf2Length " + bf2Length); + } + else { + int idx; + for (idx = 0; idx < START_OF_SERIALIZED_LONGS; ++idx) { + if (bf1Bytes[bf1Start + idx] != bf2Bytes[bf2Start + idx]) { + throw new IllegalArgumentException("bf1 NumHashFunctions/NumBits does not match bf2"); + } + } + + for (idx = START_OF_SERIALIZED_LONGS; idx < bf1Length; ++idx) { + bf1Bytes[bf1Start + idx] |= bf2Bytes[bf2Start + idx]; + } + } + } + + public class BitSet + { + private final long[] data; + + public BitSet(long bits) + { + this(new long[(int) Math.ceil((double) bits / 64.0D)]); + } + + public BitSet(long[] data) + { + assert data.length > 0 : "data length is zero!"; + + this.data = data; + } + + public void set(int index) + { + long[] var10000 = this.data; + var10000[index >>> 6] |= 1L << index; + } + + public boolean get(int index) + { + return (this.data[index >>> 6] & 1L << index) != 0L; + } + + public long bitSize() + { + return (long) this.data.length * 64L; + } + + public long[] getData() + { + return this.data; + } + + public void putAll(BloomFilter.BitSet array) + { + assert this.data.length == array.data.length : "BitArrays must be of equal length (" + this.data.length + "!= " + array.data.length + ")"; + + for (int i = 0; i < this.data.length; ++i) { + long[] var10000 = this.data; + var10000[i] |= array.data[i]; + } + } + + public void clear() + { + Arrays.fill(this.data, 0L); + } + } +} diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/HiveBloomFilter.java b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/HiveBloomFilter.java index 67ef07f108e39..a75cfcec3a635 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/HiveBloomFilter.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/statistics/HiveBloomFilter.java @@ -15,7 +15,6 @@ import com.facebook.presto.orc.metadata.statistics.StatisticsHasher.Hashable; import com.google.common.primitives.Longs; -import org.apache.hive.common.util.BloomFilter; import org.openjdk.jol.info.ClassLayout; import java.util.Arrays; diff --git a/presto-orc/src/test/java/com/facebook/presto/orc/OrcTester.java b/presto-orc/src/test/java/com/facebook/presto/orc/OrcTester.java index 22658b31a8eeb..e7811a287b897 100644 --- a/presto-orc/src/test/java/com/facebook/presto/orc/OrcTester.java +++ b/presto-orc/src/test/java/com/facebook/presto/orc/OrcTester.java @@ -13,7 +13,6 @@ */ package com.facebook.presto.orc; -import com.facebook.hive.orc.OrcConf; import com.facebook.hive.orc.lazy.OrcLazyObject; import com.facebook.presto.common.Page; import com.facebook.presto.common.Subfield; @@ -91,6 +90,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; +import org.apache.orc.OrcConf; import org.joda.time.DateTimeZone; import java.io.File; @@ -120,6 +120,10 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import static com.facebook.hive.orc.OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY; +import static com.facebook.hive.orc.OrcConf.ConfVars.HIVE_ORC_COMPRESSION; +import static com.facebook.hive.orc.OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL; +import static com.facebook.hive.orc.OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD; import static com.facebook.presto.common.type.BigintType.BIGINT; import static com.facebook.presto.common.type.BooleanType.BOOLEAN; import static com.facebook.presto.common.type.Chars.truncateToLengthAndTrimSpaces; @@ -202,7 +206,6 @@ public enum Format { ORC_12(OrcEncoding.ORC) { @Override - @SuppressWarnings("deprecation") public Serializer createSerializer() { return new OrcSerde(); @@ -210,7 +213,6 @@ public Serializer createSerializer() }, ORC_11(OrcEncoding.ORC) { @Override - @SuppressWarnings("deprecation") public Serializer createSerializer() { return new OrcSerde(); @@ -224,7 +226,6 @@ public boolean supportsType(Type type) } @Override - @SuppressWarnings("deprecation") public Serializer createSerializer() { return new com.facebook.hive.orc.OrcSerde(); @@ -248,7 +249,6 @@ public boolean supportsType(Type type) return true; } - @SuppressWarnings("deprecation") public abstract Serializer createSerializer(); } @@ -1962,7 +1962,7 @@ private static DataSize writeOrcFileColumnHive(File outputFile, Format format, R Object row = objectInspector.create(); List fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs()); - @SuppressWarnings("deprecation") Serializer serializer = format.createSerializer(); + Serializer serializer = format.createSerializer(); for (int i = 0; i < values.get(0).size(); i++) { for (int j = 0; j < types.size(); j++) { @@ -2204,8 +2204,8 @@ static RecordWriter createOrcRecordWriter(File outputFile, Format format, Compre throws IOException { JobConf jobConf = new JobConf(); - jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11"); - jobConf.set("hive.exec.orc.default.compress", compression.name()); + OrcConf.WRITE_FORMAT.setString(jobConf, format == ORC_12 ? "0.12" : "0.11"); + OrcConf.COMPRESS.setString(jobConf, compression.name()); return new OrcOutputFormat().getHiveRecordWriter( jobConf, @@ -2220,11 +2220,10 @@ private static RecordWriter createDwrfRecordWriter(File outputFile, CompressionK throws IOException { JobConf jobConf = new JobConf(); - jobConf.set("hive.exec.orc.default.compress", compressionCodec.name()); - jobConf.set("hive.exec.orc.compress", compressionCodec.name()); - OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1); - OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2); - OrcConf.setBoolVar(jobConf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true); + com.facebook.hive.orc.OrcConf.setVar(jobConf, HIVE_ORC_COMPRESSION, compressionCodec.name()); + com.facebook.hive.orc.OrcConf.setIntVar(jobConf, HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1); + com.facebook.hive.orc.OrcConf.setIntVar(jobConf, HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2); + com.facebook.hive.orc.OrcConf.setBoolVar(jobConf, HIVE_ORC_BUILD_STRIDE_DICTIONARY, true); return new com.facebook.hive.orc.OrcOutputFormat().getHiveRecordWriter( jobConf, @@ -2254,12 +2253,12 @@ private static Properties createTableProperties(List types) String columnTypes = types.stream() .map(OrcTester::getJavaObjectInspector) .map(ObjectInspector::getTypeName) - .collect(Collectors.joining(", ")); + .collect(Collectors.joining(",")); Properties orderTableProperties = new Properties(); - orderTableProperties.setProperty("columns", String.join(", ", makeColumnNames(types.size()))); + orderTableProperties.setProperty("columns", String.join(",", makeColumnNames(types.size()))); orderTableProperties.setProperty("columns.types", columnTypes); - orderTableProperties.setProperty("orc.bloom.filter.columns", String.join(", ", makeColumnNames(types.size()))); + orderTableProperties.setProperty("orc.bloom.filter.columns", String.join(",", makeColumnNames(types.size()))); orderTableProperties.setProperty("orc.bloom.filter.fpp", "0.50"); orderTableProperties.setProperty("orc.bloom.filter.write.version", "original"); return orderTableProperties; diff --git a/presto-orc/src/test/java/com/facebook/presto/orc/TestCachingOrcDataSource.java b/presto-orc/src/test/java/com/facebook/presto/orc/TestCachingOrcDataSource.java index 3b76647f66bbd..1d06143b176b0 100644 --- a/presto-orc/src/test/java/com/facebook/presto/orc/TestCachingOrcDataSource.java +++ b/presto-orc/src/test/java/com/facebook/presto/orc/TestCachingOrcDataSource.java @@ -24,10 +24,12 @@ import io.airlift.units.DataSize.Unit; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; +import org.apache.orc.OrcConf; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -281,13 +283,13 @@ private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFi throws IOException { JobConf jobConf = new JobConf(); - jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11"); - jobConf.set("hive.exec.orc.default.compress", compression.name()); + OrcConf.WRITE_FORMAT.setString(jobConf, format == ORC_12 ? "0.12" : "0.11"); + OrcConf.COMPRESS.setString(jobConf, compression.name()); Properties tableProperties = new Properties(); - tableProperties.setProperty("columns", "test"); - tableProperties.setProperty("columns.types", columnObjectInspector.getTypeName()); - tableProperties.setProperty("orc.stripe.size", "1200000"); + tableProperties.setProperty(IOConstants.COLUMNS, "test"); + tableProperties.setProperty(IOConstants.COLUMNS_TYPES, columnObjectInspector.getTypeName()); + tableProperties.setProperty(OrcConf.STRIPE_SIZE.getAttribute(), "120000"); return new OrcOutputFormat().getHiveRecordWriter( jobConf, diff --git a/presto-orc/src/test/java/com/facebook/presto/orc/TestOrcBloomFilters.java b/presto-orc/src/test/java/com/facebook/presto/orc/TestOrcBloomFilters.java index 65b4f8d48f8a8..8da27f39fbbf6 100644 --- a/presto-orc/src/test/java/com/facebook/presto/orc/TestOrcBloomFilters.java +++ b/presto-orc/src/test/java/com/facebook/presto/orc/TestOrcBloomFilters.java @@ -18,6 +18,7 @@ import com.facebook.presto.common.type.Type; import com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference; import com.facebook.presto.orc.metadata.OrcMetadataReader; +import com.facebook.presto.orc.metadata.statistics.BloomFilter; import com.facebook.presto.orc.metadata.statistics.ColumnStatistics; import com.facebook.presto.orc.metadata.statistics.HiveBloomFilter; import com.facebook.presto.orc.metadata.statistics.IntegerStatistics; @@ -27,7 +28,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.primitives.Longs; import io.airlift.slice.Slice; -import org.apache.hive.common.util.BloomFilter; import org.testng.annotations.Test; import java.io.ByteArrayInputStream; diff --git a/presto-orc/src/test/java/com/facebook/presto/orc/TestOrcReaderPositions.java b/presto-orc/src/test/java/com/facebook/presto/orc/TestOrcReaderPositions.java index 66fa5ad492f3a..b7c00e89a2969 100644 --- a/presto-orc/src/test/java/com/facebook/presto/orc/TestOrcReaderPositions.java +++ b/presto-orc/src/test/java/com/facebook/presto/orc/TestOrcReaderPositions.java @@ -26,17 +26,16 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.io.orc.NullMemoryManager; import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcSerde; -import org.apache.hadoop.hive.ql.io.orc.OrcWriterOptions; import org.apache.hadoop.hive.ql.io.orc.Writer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.Serializer; import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.io.Writable; +import org.apache.orc.NullMemoryManager; import org.testng.annotations.Test; import java.io.File; @@ -423,7 +422,7 @@ private static void createMultiStripeFile(File file) { FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, CompressionKind.NONE, ImmutableList.of(BIGINT, VARCHAR)); - @SuppressWarnings("deprecation") Serializer serde = new OrcSerde(); + Serializer serde = new OrcSerde(); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector(ImmutableList.of(BIGINT, VARCHAR)); Object row = objectInspector.create(); StructField bigintField = objectInspector.getAllStructFieldRefs().get(0); @@ -447,8 +446,8 @@ private static void createFileWithOnlyUserMetadata(File file, Mapruntime - - org.apache.parquet - parquet-column - ${dep.parquet.version} - - - org.slf4j - slf4j-api - - - - - org.apache.parquet - parquet-common - ${dep.parquet.version} - - - org.slf4j - slf4j-api - - - - - org.apache.parquet - parquet-hadoop - ${dep.parquet.version} - - - org.slf4j - slf4j-api - - - org.xerial.snappy - snappy-java - - - - - org.apache.parquet - parquet-encoding - ${dep.parquet.version} - - - org.slf4j - slf4j-api - - - - - org.apache.parquet - parquet-format - 2.6.0 - - - org.slf4j - slf4j-api - - - org.xerial.snappy - snappy-java - - - - com.google.code.findbugs jsr305 diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetTypeUtils.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetTypeUtils.java index d0e074bf9d565..f30eb1961b52d 100644 --- a/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetTypeUtils.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetTypeUtils.java @@ -167,6 +167,7 @@ public static int getFieldIndex(MessageType fileSchema, String name) } } + @SuppressWarnings("deprecation") public static ParquetEncoding getParquetEncoding(Encoding encoding) { switch (encoding) { diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/BinaryDictionary.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/BinaryDictionary.java index fdfc9c8909b90..fa4734f36b8d3 100644 --- a/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/BinaryDictionary.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/BinaryDictionary.java @@ -21,7 +21,7 @@ import static com.google.common.base.MoreObjects.toStringHelper; import static com.google.common.base.Preconditions.checkArgument; -import static parquet.bytes.BytesUtils.readIntLittleEndian; +import static org.apache.parquet.bytes.BytesUtils.readIntLittleEndian; public class BinaryDictionary extends Dictionary diff --git a/presto-parquet/src/main/java/org/apache/parquet/io/ColumnIOConverter.java b/presto-parquet/src/main/java/org/apache/parquet/io/ColumnIOConverter.java index 30416a6dbdee0..5dc23fe448ad8 100644 --- a/presto-parquet/src/main/java/org/apache/parquet/io/ColumnIOConverter.java +++ b/presto-parquet/src/main/java/org/apache/parquet/io/ColumnIOConverter.java @@ -34,6 +34,8 @@ import static com.facebook.presto.parquet.ParquetTypeUtils.getArrayElementColumn; import static com.facebook.presto.parquet.ParquetTypeUtils.getMapKeyValueColumn; import static com.facebook.presto.parquet.ParquetTypeUtils.lookupColumnByName; +import static org.apache.parquet.io.ColumnIOUtil.columnDefinitionLevel; +import static org.apache.parquet.io.ColumnIOUtil.columnRepetitionLevel; import static org.apache.parquet.schema.Type.Repetition.OPTIONAL; /** @@ -51,8 +53,8 @@ public static Optional constructField(Type type, ColumnIO columnIO) return Optional.empty(); } boolean required = columnIO.getType().getRepetition() != OPTIONAL; - int repetitionLevel = columnIO.getRepetitionLevel(); - int definitionLevel = columnIO.getDefinitionLevel(); + int repetitionLevel = columnRepetitionLevel(columnIO); + int definitionLevel = columnDefinitionLevel(columnIO); if (ROW.equals(type.getTypeSignature().getBase())) { GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO; List parameters = type.getTypeParameters(); diff --git a/presto-parquet/src/test/java/com/facebook/presto/parquet/ParquetTestUtils.java b/presto-parquet/src/test/java/com/facebook/presto/parquet/ParquetTestUtils.java index c4d37c9602f80..484d443bfce69 100644 --- a/presto-parquet/src/test/java/com/facebook/presto/parquet/ParquetTestUtils.java +++ b/presto-parquet/src/test/java/com/facebook/presto/parquet/ParquetTestUtils.java @@ -61,11 +61,11 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.Progressable; -import parquet.hadoop.ParquetOutputFormat; -import parquet.schema.GroupType; -import parquet.schema.MessageType; -import parquet.schema.PrimitiveType; -import parquet.schema.Types; +import org.apache.parquet.hadoop.ParquetOutputFormat; +import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.PrimitiveType; +import org.apache.parquet.schema.Types; import java.io.File; import java.io.IOException; @@ -113,8 +113,8 @@ import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.writableStringObjectInspector; import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getVarcharTypeInfo; -import static parquet.schema.Type.Repetition.REPEATED; -import static parquet.schema.Type.Repetition.REQUIRED; +import static org.apache.parquet.schema.Type.Repetition.REPEATED; +import static org.apache.parquet.schema.Type.Repetition.REQUIRED; public class ParquetTestUtils { @@ -406,10 +406,10 @@ public RecordWriter getHiveRecordWriter(JobConf jobConf, Path finalOutPath, Clas if (!nullable) { // Parquet writer in Hive by default writes all columns as NULLABLE. We want to change it to write NON-NULLABLE // type by changing the MessageType (parquet schema type) - List types = messageType.getFields(); - List newTypes = new ArrayList<>(); + List types = messageType.getFields(); + List newTypes = new ArrayList<>(); - for (parquet.schema.Type type : types) { + for (org.apache.parquet.schema.Type type : types) { newTypes.add(convertToRequiredType(type)); } messageType = new MessageType("hive_schema", newTypes); @@ -422,13 +422,13 @@ public RecordWriter getHiveRecordWriter(JobConf jobConf, Path finalOutPath, Clas }.getHiveRecordWriter(conf, target, Text.class, compress, properties, Reporter.NULL); } - private static parquet.schema.Type convertToRequiredType(parquet.schema.Type type) + private static org.apache.parquet.schema.Type convertToRequiredType(org.apache.parquet.schema.Type type) { if (type instanceof GroupType) { GroupType groupType = (GroupType) type; - List fields = groupType.getFields(); - List newFields = new ArrayList<>(); - for (parquet.schema.Type field : fields) { + List fields = groupType.getFields(); + List newFields = new ArrayList<>(); + for (org.apache.parquet.schema.Type field : fields) { newFields.add(convertToRequiredType(field)); } return new GroupType(REPEATED, groupType.getName(), newFields); @@ -438,7 +438,7 @@ else if (type instanceof PrimitiveType) { Types.PrimitiveBuilder builder = Types.primitive(primitiveType.getPrimitiveTypeName(), REQUIRED); if (primitiveType.getDecimalMetadata() != null) { - builder = builder.scale(primitiveType.getDecimalMetadata().getScale()) + builder = (Types.PrimitiveBuilder) builder.scale(primitiveType.getDecimalMetadata().getScale()) .precision(primitiveType.getDecimalMetadata().getPrecision()); } diff --git a/presto-rcfile/src/test/java/com/facebook/presto/rcfile/RcFileTester.java b/presto-rcfile/src/test/java/com/facebook/presto/rcfile/RcFileTester.java index 51713c0ea9503..d2b2abfd818a2 100644 --- a/presto-rcfile/src/test/java/com/facebook/presto/rcfile/RcFileTester.java +++ b/presto-rcfile/src/test/java/com/facebook/presto/rcfile/RcFileTester.java @@ -197,7 +197,6 @@ public enum Format { BINARY { @Override - @SuppressWarnings("deprecation") public Serializer createSerializer() { return new LazyBinaryColumnarSerDe(); @@ -212,7 +211,6 @@ public RcFileEncoding getVectorEncoding() TEXT { @Override - @SuppressWarnings("deprecation") public Serializer createSerializer() { try { @@ -235,7 +233,6 @@ public RcFileEncoding getVectorEncoding() } }; - @SuppressWarnings("deprecation") public abstract Serializer createSerializer(); public abstract RcFileEncoding getVectorEncoding(); @@ -776,7 +773,6 @@ private static void as schema.setProperty(META_TABLE_COLUMNS, "test"); schema.setProperty(META_TABLE_COLUMN_TYPES, getJavaObjectInspector(type).getTypeName()); - @SuppressWarnings("deprecation") Deserializer deserializer; if (format == Format.BINARY) { deserializer = new LazyBinaryColumnarSerDe(); @@ -925,7 +921,7 @@ private static DataSize writeRcFileColumnOld(File outputFile, Format format, Com Object row = objectInspector.create(); List fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs()); - @SuppressWarnings("deprecation") Serializer serializer = format.createSerializer(); + Serializer serializer = format.createSerializer(); Properties tableProperties = new Properties(); tableProperties.setProperty("columns", "test");