diff --git a/pom.xml b/pom.xml index 79535eef48ec7..46d11c75a7e64 100644 --- a/pom.xml +++ b/pom.xml @@ -77,6 +77,7 @@ presto-blackhole presto-memory presto-orc + presto-parquet presto-rcfile presto-hive presto-hive-hadoop2 @@ -187,6 +188,12 @@ ${project.version} + + com.facebook.presto + presto-parquet + ${project.version} + + com.facebook.presto presto-rcfile diff --git a/presto-hive/pom.xml b/presto-hive/pom.xml index 94218cba71ffb..bc50cf560bb63 100644 --- a/presto-hive/pom.xml +++ b/presto-hive/pom.xml @@ -32,6 +32,11 @@ presto-orc + + com.facebook.presto + presto-parquet + + com.facebook.presto presto-memory-context diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java index 3ffb68b52dcba..acf5e233c4e52 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java @@ -101,8 +101,6 @@ public class HiveClientConfig private DataSize textMaxLineLength = new DataSize(100, MEGABYTE); private boolean useParquetColumnNames; - private boolean parquetOptimizedReaderEnabled = true; - private boolean parquetPredicatePushdownEnabled = true; private boolean assumeCanonicalPartitionKeys; @@ -670,34 +668,6 @@ public HiveClientConfig setVerifyChecksum(boolean verifyChecksum) return this; } - @Deprecated - public boolean isParquetPredicatePushdownEnabled() - { - return parquetPredicatePushdownEnabled; - } - - @Deprecated - @Config("hive.parquet-predicate-pushdown.enabled") - public HiveClientConfig setParquetPredicatePushdownEnabled(boolean parquetPredicatePushdownEnabled) - { - this.parquetPredicatePushdownEnabled = parquetPredicatePushdownEnabled; - return this; - } - - @Deprecated - public boolean isParquetOptimizedReaderEnabled() - { - return parquetOptimizedReaderEnabled; - } - - @Deprecated - @Config("hive.parquet-optimized-reader.enabled") - public HiveClientConfig setParquetOptimizedReaderEnabled(boolean parquetOptimizedReaderEnabled) - { - this.parquetOptimizedReaderEnabled = parquetOptimizedReaderEnabled; - return this; - } - public boolean isUseOrcColumnNames() { return useOrcColumnNames; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientModule.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientModule.java index 559bd5ea31c80..29004d5a11f4b 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientModule.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientModule.java @@ -17,7 +17,6 @@ import com.facebook.presto.hive.orc.DwrfPageSourceFactory; import com.facebook.presto.hive.orc.OrcPageSourceFactory; import com.facebook.presto.hive.parquet.ParquetPageSourceFactory; -import com.facebook.presto.hive.parquet.ParquetRecordCursorProvider; import com.facebook.presto.hive.rcfile.RcFilePageSourceFactory; import com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider; import com.facebook.presto.spi.connector.ConnectorPageSinkProvider; @@ -76,7 +75,6 @@ public void configure(Binder binder) newExporter(binder).export(NamenodeStats.class).as(generatedNameOf(NamenodeStats.class, connectorId)); Multibinder recordCursorProviderBinder = newSetBinder(binder, HiveRecordCursorProvider.class); - recordCursorProviderBinder.addBinding().to(ParquetRecordCursorProvider.class).in(Scopes.SINGLETON); recordCursorProviderBinder.addBinding().to(GenericHiveRecordCursorProvider.class).in(Scopes.SINGLETON); binder.bind(HiveWriterStats.class).in(Scopes.SINGLETON); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java index fd6bdb7f41c8f..e7850d745663b 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java @@ -61,8 +61,6 @@ public final class HiveSessionProperties private static final String ORC_OPTIMIZED_WRITER_MAX_DICTIONARY_MEMORY = "orc_optimized_writer_max_dictionary_memory"; private static final String HIVE_STORAGE_FORMAT = "hive_storage_format"; private static final String RESPECT_TABLE_FORMAT = "respect_table_format"; - private static final String PARQUET_PREDICATE_PUSHDOWN_ENABLED = "parquet_predicate_pushdown_enabled"; - private static final String PARQUET_OPTIMIZED_READER_ENABLED = "parquet_optimized_reader_enabled"; private static final String PARQUET_USE_COLUMN_NAME = "parquet_use_column_names"; private static final String PARQUET_WRITER_BLOCK_SIZE = "parquet_writer_block_size"; private static final String PARQUET_WRITER_PAGE_SIZE = "parquet_writer_page_size"; @@ -221,16 +219,6 @@ public HiveSessionProperties(HiveClientConfig hiveClientConfig, OrcFileWriterCon "Write new partitions using table format rather than default storage format", hiveClientConfig.isRespectTableFormat(), false), - booleanProperty( - PARQUET_OPTIMIZED_READER_ENABLED, - "Experimental: Parquet: Enable optimized reader", - hiveClientConfig.isParquetOptimizedReaderEnabled(), - false), - booleanProperty( - PARQUET_PREDICATE_PUSHDOWN_ENABLED, - "Experimental: Parquet: Enable predicate pushdown for Parquet", - hiveClientConfig.isParquetPredicatePushdownEnabled(), - false), booleanProperty( PARQUET_USE_COLUMN_NAME, "Experimental: Parquet: Access Parquet columns using names from the file", @@ -313,11 +301,6 @@ public static InsertExistingPartitionsBehavior getInsertExistingPartitionsBehavi return session.getProperty(INSERT_EXISTING_PARTITIONS_BEHAVIOR, InsertExistingPartitionsBehavior.class); } - public static boolean isParquetOptimizedReaderEnabled(ConnectorSession session) - { - return session.getProperty(PARQUET_OPTIMIZED_READER_ENABLED, Boolean.class); - } - public static boolean isOrcBloomFiltersEnabled(ConnectorSession session) { return session.getProperty(ORC_BLOOM_FILTERS_ENABLED, Boolean.class); @@ -415,11 +398,6 @@ public static boolean isRespectTableFormat(ConnectorSession session) return session.getProperty(RESPECT_TABLE_FORMAT, Boolean.class); } - public static boolean isParquetPredicatePushdownEnabled(ConnectorSession session) - { - return session.getProperty(PARQUET_PREDICATE_PUSHDOWN_ENABLED, Boolean.class); - } - public static boolean isUseParquetColumnNames(ConnectorSession session) { return session.getProperty(PARQUET_USE_COLUMN_NAME, Boolean.class); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/HdfsParquetDataSource.java b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/HdfsParquetDataSource.java index fdd53230bc9b1..04be3dd10922b 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/HdfsParquetDataSource.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/HdfsParquetDataSource.java @@ -14,6 +14,7 @@ package com.facebook.presto.hive.parquet; import com.facebook.presto.hive.FileFormatDataSourceStats; +import com.facebook.presto.parquet.ParquetDataSource; import com.facebook.presto.spi.PrestoException; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetHiveRecordCursor.java b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetHiveRecordCursor.java deleted file mode 100644 index ce9b3f2bad52c..0000000000000 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetHiveRecordCursor.java +++ /dev/null @@ -1,1375 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.hive.parquet; - -import com.facebook.presto.hive.FileFormatDataSourceStats; -import com.facebook.presto.hive.HdfsEnvironment; -import com.facebook.presto.hive.HiveColumnHandle; -import com.facebook.presto.hive.parquet.predicate.ParquetPredicate; -import com.facebook.presto.hive.util.DecimalUtils; -import com.facebook.presto.spi.PrestoException; -import com.facebook.presto.spi.RecordCursor; -import com.facebook.presto.spi.block.Block; -import com.facebook.presto.spi.block.BlockBuilder; -import com.facebook.presto.spi.predicate.TupleDomain; -import com.facebook.presto.spi.type.DecimalType; -import com.facebook.presto.spi.type.Decimals; -import com.facebook.presto.spi.type.Type; -import com.facebook.presto.spi.type.TypeManager; -import com.google.common.collect.ImmutableList; -import io.airlift.slice.Slice; -import it.unimi.dsi.fastutil.longs.LongArrayList; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import parquet.column.ColumnDescriptor; -import parquet.column.Dictionary; -import parquet.hadoop.ParquetFileReader; -import parquet.hadoop.ParquetInputSplit; -import parquet.hadoop.ParquetRecordReader; -import parquet.hadoop.api.ReadSupport; -import parquet.hadoop.metadata.BlockMetaData; -import parquet.hadoop.metadata.FileMetaData; -import parquet.hadoop.metadata.ParquetMetadata; -import parquet.hadoop.util.ContextUtil; -import parquet.io.api.Binary; -import parquet.io.api.Converter; -import parquet.io.api.GroupConverter; -import parquet.io.api.PrimitiveConverter; -import parquet.io.api.RecordMaterializer; -import parquet.schema.DecimalMetadata; -import parquet.schema.GroupType; -import parquet.schema.MessageType; -import parquet.schema.PrimitiveType; - -import java.io.IOException; -import java.io.UncheckedIOException; -import java.math.BigInteger; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Properties; - -import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR; -import static com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT; -import static com.facebook.presto.hive.HiveErrorCode.HIVE_CURSOR_ERROR; -import static com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA; -import static com.facebook.presto.hive.HiveUtil.closeWithSuppression; -import static com.facebook.presto.hive.HiveUtil.getDecimalType; -import static com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getDescriptors; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetType; -import static com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.buildParquetPredicate; -import static com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.getParquetTupleDomain; -import static com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.predicateMatches; -import static com.facebook.presto.spi.type.Chars.isCharType; -import static com.facebook.presto.spi.type.Chars.truncateToLengthAndTrimSpaces; -import static com.facebook.presto.spi.type.DecimalType.createDecimalType; -import static com.facebook.presto.spi.type.StandardTypes.ARRAY; -import static com.facebook.presto.spi.type.StandardTypes.MAP; -import static com.facebook.presto.spi.type.StandardTypes.ROW; -import static com.facebook.presto.spi.type.TimestampType.TIMESTAMP; -import static com.facebook.presto.spi.type.Varchars.isVarcharType; -import static com.facebook.presto.spi.type.Varchars.truncateToLength; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkState; -import static com.google.common.base.Throwables.throwIfInstanceOf; -import static io.airlift.slice.Slices.wrappedBuffer; -import static java.lang.Float.floatToRawIntBits; -import static java.lang.Math.max; -import static java.lang.Math.min; -import static java.lang.String.format; -import static java.util.Objects.requireNonNull; -import static java.util.stream.Collectors.toList; -import static parquet.format.converter.ParquetMetadataConverter.NO_FILTER; -import static parquet.schema.OriginalType.DECIMAL; -import static parquet.schema.OriginalType.MAP_KEY_VALUE; - -public class ParquetHiveRecordCursor - implements RecordCursor -{ - private final ParquetRecordReader recordReader; - - private final Type[] types; - - private final boolean[] booleans; - private final long[] longs; - private final double[] doubles; - private final Slice[] slices; - private final Object[] objects; - private final boolean[] nulls; - - private final long totalBytes; - private long completedBytes; - private boolean closed; - - private final FileFormatDataSourceStats stats; - - public ParquetHiveRecordCursor( - HdfsEnvironment hdfsEnvironment, - String sessionUser, - Configuration configuration, - Path path, - long start, - long length, - long fileSize, - Properties splitSchema, - List columns, - boolean useParquetColumnNames, - TypeManager typeManager, - boolean predicatePushdownEnabled, - TupleDomain effectivePredicate, - FileFormatDataSourceStats stats) - { - requireNonNull(path, "path is null"); - checkArgument(length >= 0, "length is negative"); - requireNonNull(splitSchema, "splitSchema is null"); - requireNonNull(columns, "columns is null"); - this.stats = requireNonNull(stats, "stats is null"); - - this.totalBytes = length; - - int size = columns.size(); - - this.types = new Type[size]; - - this.booleans = new boolean[size]; - this.longs = new long[size]; - this.doubles = new double[size]; - this.slices = new Slice[size]; - this.objects = new Object[size]; - this.nulls = new boolean[size]; - - for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { - HiveColumnHandle column = columns.get(columnIndex); - checkState(column.getColumnType() == REGULAR, "column type must be regular"); - - types[columnIndex] = typeManager.getType(column.getTypeSignature()); - } - - this.recordReader = createParquetRecordReader( - hdfsEnvironment, - sessionUser, - configuration, - path, - start, - length, - fileSize, - columns, - useParquetColumnNames, - predicatePushdownEnabled, - effectivePredicate); - } - - @Override - public long getCompletedBytes() - { - if (!closed) { - updateCompletedBytes(); - } - return completedBytes; - } - - @Override - public long getReadTimeNanos() - { - return 0; - } - - private void updateCompletedBytes() - { - try { - long newCompletedBytes = (long) (totalBytes * recordReader.getProgress()); - completedBytes = min(totalBytes, max(completedBytes, newCompletedBytes)); - } - catch (IOException ignored) { - } - catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - } - - @Override - public Type getType(int field) - { - return types[field]; - } - - @Override - public boolean advanceNextPosition() - { - try { - // reset null flags - Arrays.fill(nulls, true); - - if (closed || !recordReader.nextKeyValue()) { - close(); - return false; - } - - return true; - } - catch (IOException | RuntimeException | InterruptedException e) { - if (e instanceof InterruptedException) { - Thread.currentThread().interrupt(); - } - - closeWithSuppression(this, e); - throw new PrestoException(HIVE_CURSOR_ERROR, e); - } - } - - @Override - public boolean getBoolean(int fieldId) - { - checkState(!closed, "Cursor is closed"); - - validateType(fieldId, boolean.class); - return booleans[fieldId]; - } - - @Override - public long getLong(int fieldId) - { - checkState(!closed, "Cursor is closed"); - - validateType(fieldId, long.class); - return longs[fieldId]; - } - - @Override - public double getDouble(int fieldId) - { - checkState(!closed, "Cursor is closed"); - - validateType(fieldId, double.class); - return doubles[fieldId]; - } - - @Override - public Slice getSlice(int fieldId) - { - checkState(!closed, "Cursor is closed"); - - validateType(fieldId, Slice.class); - return slices[fieldId]; - } - - @Override - public Object getObject(int fieldId) - { - checkState(!closed, "Cursor is closed"); - - validateType(fieldId, Block.class); - return objects[fieldId]; - } - - @Override - public boolean isNull(int fieldId) - { - checkState(!closed, "Cursor is closed"); - return nulls[fieldId]; - } - - private void validateType(int fieldId, Class javaType) - { - if (types[fieldId].getJavaType() != javaType) { - // we don't use Preconditions.checkArgument because it requires boxing fieldId, which affects inner loop performance - throw new IllegalArgumentException(format("Expected field to be %s, actual %s (field %s)", javaType.getName(), types[fieldId].getJavaType().getName(), fieldId)); - } - } - - @Override - public void close() - { - // some hive input formats are broken and bad things can happen if you close them multiple times - if (closed) { - return; - } - closed = true; - - updateCompletedBytes(); - - try { - recordReader.close(); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - private ParquetRecordReader createParquetRecordReader( - HdfsEnvironment hdfsEnvironment, - String sessionUser, - Configuration configuration, - Path path, - long start, - long length, - long fileSize, - List columns, - boolean useParquetColumnNames, - boolean predicatePushdownEnabled, - TupleDomain effectivePredicate) - { - ParquetDataSource dataSource = null; - try { - FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration); - dataSource = buildHdfsParquetDataSource(fileSystem, path, start, length, fileSize, stats); - ParquetMetadata parquetMetadata = hdfsEnvironment.doAs(sessionUser, () -> ParquetFileReader.readFooter(configuration, path, NO_FILTER)); - List blocks = parquetMetadata.getBlocks(); - FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); - MessageType fileSchema = fileMetaData.getSchema(); - - PrestoReadSupport readSupport = new PrestoReadSupport(useParquetColumnNames, columns, fileSchema); - - List fields = columns.stream() - .filter(column -> column.getColumnType() == REGULAR) - .map(column -> getParquetType(column, fileSchema, useParquetColumnNames)) - .filter(Objects::nonNull) - .collect(toList()); - - MessageType requestedSchema = new MessageType(fileSchema.getName(), fields); - - LongArrayList offsets = new LongArrayList(blocks.size()); - for (BlockMetaData block : blocks) { - long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset(); - if (firstDataPage >= start && firstDataPage < start + length) { - if (predicatePushdownEnabled) { - Map, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema); - TupleDomain parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate); - ParquetPredicate parquetPredicate = buildParquetPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath); - if (predicateMatches(parquetPredicate, block, dataSource, descriptorsByPath, parquetTupleDomain)) { - offsets.add(block.getStartingPos()); - } - } - else { - offsets.add(block.getStartingPos()); - } - } - } - - ParquetInputSplit split = new ParquetInputSplit(path, start, start + length, length, null, offsets.toLongArray()); - - TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(configuration, new TaskAttemptID()); - - return hdfsEnvironment.doAs(sessionUser, () -> { - ParquetRecordReader realReader = new PrestoParquetRecordReader(readSupport); - realReader.initialize(split, taskContext); - return realReader; - }); - } - catch (Exception e) { - throwIfInstanceOf(e, PrestoException.class); - if (e instanceof InterruptedException) { - Thread.currentThread().interrupt(); - throw new RuntimeException(e); - } - String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage()); - if (e.getClass().getSimpleName().equals("BlockMissingException")) { - throw new PrestoException(HIVE_MISSING_DATA, message, e); - } - throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e); - } - finally { - if (dataSource != null) { - try { - dataSource.close(); - } - catch (IOException ignored) { - } - } - } - } - - public class PrestoParquetRecordReader - extends ParquetRecordReader - { - public PrestoParquetRecordReader(PrestoReadSupport readSupport) - { - super(readSupport); - } - } - - public final class PrestoReadSupport - extends ReadSupport - { - private final boolean useParquetColumnNames; - private final List columns; - private final List converters; - - public PrestoReadSupport(boolean useParquetColumnNames, List columns, MessageType messageType) - { - this.columns = columns; - this.useParquetColumnNames = useParquetColumnNames; - - ImmutableList.Builder converters = ImmutableList.builder(); - for (int i = 0; i < columns.size(); i++) { - HiveColumnHandle column = columns.get(i); - if (column.getColumnType() == REGULAR) { - parquet.schema.Type parquetType = getParquetType(column, messageType, useParquetColumnNames); - if (parquetType == null) { - continue; - } - if (parquetType.isPrimitive()) { - Optional decimalType = getDecimalType(column.getHiveType()); - if (decimalType.isPresent()) { - converters.add(new ParquetDecimalColumnConverter(i, decimalType.get())); - } - else { - converters.add(new ParquetPrimitiveColumnConverter(i)); - } - } - else { - converters.add(new ParquetColumnConverter(createGroupConverter(types[i], parquetType.getName(), parquetType, i), i)); - } - } - } - this.converters = converters.build(); - } - - @Override - @SuppressWarnings("deprecation") - public ReadContext init( - Configuration configuration, - Map keyValueMetaData, - MessageType messageType) - { - List fields = columns.stream() - .filter(column -> column.getColumnType() == REGULAR) - .map(column -> getParquetType(column, messageType, useParquetColumnNames)) - .filter(Objects::nonNull) - .collect(toList()); - MessageType requestedProjection = new MessageType(messageType.getName(), fields); - return new ReadContext(requestedProjection); - } - - @Override - public RecordMaterializer prepareForRead( - Configuration configuration, - Map keyValueMetaData, - MessageType fileSchema, - ReadContext readContext) - { - return new ParquetRecordConverter(converters); - } - } - - private static class ParquetRecordConverter - extends RecordMaterializer - { - private final ParquetGroupConverter groupConverter; - - public ParquetRecordConverter(List converters) - { - groupConverter = new ParquetGroupConverter(converters); - } - - @Override - public FakeParquetRecord getCurrentRecord() - { - // Parquet skips the record if it is null, so we need non-null record - return FakeParquetRecord.MATERIALIZE_RECORD; - } - - @Override - public GroupConverter getRootConverter() - { - return groupConverter; - } - } - - private enum FakeParquetRecord - { - MATERIALIZE_RECORD - } - - public static class ParquetGroupConverter - extends GroupConverter - { - private final List converters; - - public ParquetGroupConverter(List converters) - { - this.converters = converters; - } - - @Override - public Converter getConverter(int fieldIndex) - { - return converters.get(fieldIndex); - } - - @Override - public void start() - { - } - - @Override - public void end() - { - } - } - - @SuppressWarnings("AccessingNonPublicFieldOfAnotherObject") - private class ParquetPrimitiveColumnConverter - extends PrimitiveConverter - { - private final int fieldIndex; - - private ParquetPrimitiveColumnConverter(int fieldIndex) - { - this.fieldIndex = fieldIndex; - } - - @Override - public boolean isPrimitive() - { - return true; - } - - @Override - public PrimitiveConverter asPrimitiveConverter() - { - return this; - } - - @Override - public boolean hasDictionarySupport() - { - return false; - } - - @Override - public void setDictionary(Dictionary dictionary) - { - } - - @Override - public void addValueFromDictionary(int dictionaryId) - { - } - - @Override - public void addBoolean(boolean value) - { - nulls[fieldIndex] = false; - booleans[fieldIndex] = value; - } - - @Override - public void addDouble(double value) - { - nulls[fieldIndex] = false; - doubles[fieldIndex] = value; - } - - @Override - public void addLong(long value) - { - nulls[fieldIndex] = false; - longs[fieldIndex] = value; - } - - @Override - public void addBinary(Binary value) - { - nulls[fieldIndex] = false; - Type type = types[fieldIndex]; - if (type == TIMESTAMP) { - longs[fieldIndex] = ParquetTimestampUtils.getTimestampMillis(value); - } - else if (isVarcharType(type)) { - slices[fieldIndex] = truncateToLength(wrappedBuffer(value.getBytes()), type); - } - else if (isCharType(type)) { - slices[fieldIndex] = truncateToLengthAndTrimSpaces(wrappedBuffer(value.getBytes()), type); - } - else { - slices[fieldIndex] = wrappedBuffer(value.getBytes()); - } - } - - @Override - public void addFloat(float value) - { - nulls[fieldIndex] = false; - longs[fieldIndex] = floatToRawIntBits(value); - } - - @Override - public void addInt(int value) - { - nulls[fieldIndex] = false; - longs[fieldIndex] = value; - } - } - - // todo: support for other types of decimal storage (see https://github.com/Parquet/parquet-format/blob/master/LogicalTypes.md) - private class ParquetDecimalColumnConverter - extends PrimitiveConverter - { - private final int fieldIndex; - private final DecimalType decimalType; - - private ParquetDecimalColumnConverter(int fieldIndex, DecimalType decimalType) - { - this.fieldIndex = fieldIndex; - this.decimalType = requireNonNull(decimalType, "decimalType is null"); - } - - public void addBinary(Binary value) - { - nulls[fieldIndex] = false; - if (decimalType.isShort()) { - longs[fieldIndex] = DecimalUtils.getShortDecimalValue(value.getBytes()); - } - else { - slices[fieldIndex] = Decimals.encodeUnscaledValue(new BigInteger(value.getBytes())); - } - } - } - - public class ParquetColumnConverter - extends GroupConverter - { - private final GroupedConverter groupedConverter; - private final int fieldIndex; - - public ParquetColumnConverter(GroupedConverter groupedConverter, int fieldIndex) - { - this.groupedConverter = groupedConverter; - this.fieldIndex = fieldIndex; - } - - @Override - public Converter getConverter(int fieldIndex) - { - return groupedConverter.getConverter(fieldIndex); - } - - @Override - public void start() - { - groupedConverter.beforeValue(null); - groupedConverter.start(); - } - - @Override - public void end() - { - groupedConverter.afterValue(); - groupedConverter.end(); - - nulls[fieldIndex] = false; - - objects[fieldIndex] = groupedConverter.getBlock(); - } - } - - private interface BlockConverter - { - void beforeValue(BlockBuilder builder); - - void afterValue(); - } - - private abstract static class GroupedConverter - extends GroupConverter - implements BlockConverter - { - public abstract Block getBlock(); - } - - private static BlockConverter createConverter(Type prestoType, String columnName, parquet.schema.Type parquetType, int fieldIndex) - { - if (parquetType.isPrimitive()) { - if (parquetType.getOriginalType() == DECIMAL) { - DecimalMetadata decimalMetadata = ((PrimitiveType) parquetType).getDecimalMetadata(); - return new ParquetDecimalConverter(createDecimalType(decimalMetadata.getPrecision(), decimalMetadata.getScale())); - } - else { - return new ParquetPrimitiveConverter(prestoType, fieldIndex); - } - } - - return createGroupConverter(prestoType, columnName, parquetType, fieldIndex); - } - - private static GroupedConverter createGroupConverter(Type prestoType, String columnName, parquet.schema.Type parquetType, int fieldIndex) - { - GroupType groupType = parquetType.asGroupType(); - switch (prestoType.getTypeSignature().getBase()) { - case ARRAY: - return new ParquetListConverter(prestoType, columnName, groupType, fieldIndex); - case MAP: - return new ParquetMapConverter(prestoType, columnName, groupType, fieldIndex); - case ROW: - return new ParquetStructConverter(prestoType, columnName, groupType, fieldIndex); - default: - throw new IllegalArgumentException("Column " + columnName + " type " + parquetType.getOriginalType() + " not supported"); - } - } - - private static class ParquetStructConverter - extends GroupedConverter - { - private static final int NULL_BUILDER_POSITIONS_THRESHOLD = 100; - private static final int NULL_BUILDER_SIZE_IN_BYTES_THRESHOLD = 32768; - - private final Type rowType; - private final int fieldIndex; - - private final List converters; - private BlockBuilder builder; - private BlockBuilder nullBuilder; // used internally when builder is set to null - private BlockBuilder currentEntryBuilder; - - public ParquetStructConverter(Type prestoType, String columnName, GroupType entryType, int fieldIndex) - { - checkArgument(ROW.equals(prestoType.getTypeSignature().getBase())); - List prestoTypeParameters = prestoType.getTypeParameters(); - List fieldTypes = entryType.getFields(); - checkArgument( - prestoTypeParameters.size() == fieldTypes.size(), - "Schema mismatch, metastore schema for row column %s has %s fields but parquet schema has %s fields", - columnName, - prestoTypeParameters.size(), - fieldTypes.size()); - - this.rowType = prestoType; - this.fieldIndex = fieldIndex; - - ImmutableList.Builder converters = ImmutableList.builder(); - for (int i = 0; i < prestoTypeParameters.size(); i++) { - parquet.schema.Type fieldType = fieldTypes.get(i); - converters.add(createConverter(prestoTypeParameters.get(i), columnName + "." + fieldType.getName(), fieldType, i)); - } - this.converters = converters.build(); - } - - @Override - public Converter getConverter(int fieldIndex) - { - return (Converter) converters.get(fieldIndex); - } - - @Override - public void beforeValue(BlockBuilder builder) - { - this.builder = builder; - } - - @Override - public void start() - { - if (builder == null) { - if (nullBuilder == null || (nullBuilder.getPositionCount() >= NULL_BUILDER_POSITIONS_THRESHOLD && nullBuilder.getSizeInBytes() >= NULL_BUILDER_SIZE_IN_BYTES_THRESHOLD)) { - nullBuilder = rowType.createBlockBuilder(null, NULL_BUILDER_POSITIONS_THRESHOLD); - } - currentEntryBuilder = nullBuilder.beginBlockEntry(); - } - else { - while (builder.getPositionCount() < fieldIndex) { - builder.appendNull(); - } - currentEntryBuilder = builder.beginBlockEntry(); - } - for (BlockConverter converter : converters) { - converter.beforeValue(currentEntryBuilder); - } - } - - @Override - public void end() - { - for (BlockConverter converter : converters) { - converter.afterValue(); - } - while (currentEntryBuilder.getPositionCount() < converters.size()) { - currentEntryBuilder.appendNull(); - } - - if (builder == null) { - nullBuilder.closeEntry(); - } - else { - builder.closeEntry(); - } - } - - @Override - public void afterValue() - { - } - - @Override - public Block getBlock() - { - checkState(builder == null && nullBuilder != null); // check that user requested a result block (builder == null), and the program followed the request (nullBuilder != null) - return nullBuilder.getObject(nullBuilder.getPositionCount() - 1, Block.class); - } - } - - private static class ParquetListConverter - extends GroupedConverter - { - private static final int NULL_BUILDER_POSITIONS_THRESHOLD = 100; - private static final int NULL_BUILDER_SIZE_IN_BYTES_THRESHOLD = 32768; - - private final Type arrayType; - private final int fieldIndex; - - private final BlockConverter elementConverter; - private BlockBuilder builder; - private BlockBuilder nullBuilder; // used internally when builder is set to null - private BlockBuilder currentEntryBuilder; - - public ParquetListConverter(Type prestoType, String columnName, GroupType listType, int fieldIndex) - { - checkArgument( - listType.getFieldCount() == 1, - "Expected LIST column '%s' to only have one field, but has %s fields", - columnName, - listType.getFieldCount()); - checkArgument(ARRAY.equals(prestoType.getTypeSignature().getBase())); - - this.arrayType = prestoType; - this.fieldIndex = fieldIndex; - - // The Parquet specification requires that the element value of a - // LIST type be wrapped in an inner repeated group, like so: - // - // optional group listField (LIST) { - // repeated group list { - // optional int element - // } - // } - // - // However, some parquet libraries don't follow this spec. The - // compatibility rules used here are specified in the Parquet - // documentation at http://git.io/vOpNz. - parquet.schema.Type elementType = listType.getType(0); - if (isElementType(elementType, listType.getName())) { - elementConverter = createConverter(prestoType.getTypeParameters().get(0), columnName + ".element", elementType, 0); - } - else { - elementConverter = new ParquetListEntryConverter(prestoType.getTypeParameters().get(0), columnName, elementType.asGroupType()); - } - } - - //copied over from Apache Hive - private boolean isElementType(parquet.schema.Type repeatedType, String parentName) - { - if (repeatedType.isPrimitive() || - (repeatedType.asGroupType().getFieldCount() > 1)) { - return true; - } - - if (repeatedType.getName().equals("array")) { - return true; // existing avro data - } - - if (repeatedType.getName().equals(parentName + "_tuple")) { - return true; // existing thrift data - } - // false for the following cases: - // * name is "list", which matches the spec - // * name is "bag", which indicates existing hive or pig data - // * ambiguous case, which should be assumed is 3-level according to spec - return false; - } - - @Override - public void beforeValue(BlockBuilder builder) - { - this.builder = builder; - } - - @Override - public Converter getConverter(int fieldIndex) - { - if (fieldIndex == 0) { - return (Converter) elementConverter; - } - throw new IllegalArgumentException("LIST field must be 0 not " + fieldIndex); - } - - @Override - public void start() - { - if (builder == null) { - if (nullBuilder == null || (nullBuilder.getPositionCount() >= NULL_BUILDER_POSITIONS_THRESHOLD && nullBuilder.getSizeInBytes() >= NULL_BUILDER_SIZE_IN_BYTES_THRESHOLD)) { - nullBuilder = arrayType.createBlockBuilder(null, NULL_BUILDER_POSITIONS_THRESHOLD); - } - currentEntryBuilder = nullBuilder.beginBlockEntry(); - } - else { - while (builder.getPositionCount() < fieldIndex) { - builder.appendNull(); - } - currentEntryBuilder = builder.beginBlockEntry(); - } - elementConverter.beforeValue(currentEntryBuilder); - } - - @Override - public void end() - { - elementConverter.afterValue(); - - if (builder == null) { - nullBuilder.closeEntry(); - } - else { - builder.closeEntry(); - } - } - - @Override - public void afterValue() - { - } - - @Override - public Block getBlock() - { - checkState(builder == null && nullBuilder != null); // check that user requested a result block (builder == null), and the program followed the request (nullBuilder != null) - return nullBuilder.getObject(nullBuilder.getPositionCount() - 1, Block.class); - } - } - - private static class ParquetListEntryConverter - extends GroupConverter - implements BlockConverter - { - private final BlockConverter elementConverter; - - private BlockBuilder builder; - private int startingPosition; - - public ParquetListEntryConverter(Type prestoType, String columnName, GroupType elementType) - { - checkArgument( - elementType.getOriginalType() == null, - "Expected LIST column '%s' field to be type STRUCT, but is %s", - columnName, - elementType); - - checkArgument( - elementType.getFieldCount() == 1, - "Expected LIST column '%s' element to have one field, but has %s fields", - columnName, - elementType.getFieldCount()); - - elementConverter = createConverter(prestoType, columnName + ".element", elementType.getType(0), 0); - } - - @Override - public Converter getConverter(int fieldIndex) - { - if (fieldIndex == 0) { - return (Converter) elementConverter; - } - throw new IllegalArgumentException("LIST entry field must be 0 not " + fieldIndex); - } - - @Override - public void beforeValue(BlockBuilder builder) - { - this.builder = builder; - } - - @Override - public void start() - { - elementConverter.beforeValue(builder); - startingPosition = builder.getPositionCount(); - } - - @Override - public void end() - { - elementConverter.afterValue(); - // we have read nothing, this means there is a null element in this list - if (builder.getPositionCount() == startingPosition) { - builder.appendNull(); - } - } - - @Override - public void afterValue() - { - } - } - - private static class ParquetMapConverter - extends GroupedConverter - { - private static final int NULL_BUILDER_POSITIONS_THRESHOLD = 100; - private static final int NULL_BUILDER_SIZE_IN_BYTES_THRESHOLD = 32768; - - private final Type mapType; - private final int fieldIndex; - - private final ParquetMapEntryConverter entryConverter; - private BlockBuilder builder; - private BlockBuilder nullBuilder; // used internally when builder is set to null - private BlockBuilder currentEntryBuilder; - - public ParquetMapConverter(Type type, String columnName, GroupType mapType, int fieldIndex) - { - checkArgument( - mapType.getFieldCount() == 1, - "Expected MAP column '%s' to only have one field, but has %s fields", - mapType.getName(), - mapType.getFieldCount()); - - this.mapType = type; - this.fieldIndex = fieldIndex; - - parquet.schema.Type entryType = mapType.getFields().get(0); - - entryConverter = new ParquetMapEntryConverter(type, columnName + ".entry", entryType.asGroupType()); - } - - @Override - public void beforeValue(BlockBuilder builder) - { - this.builder = builder; - } - - @Override - public Converter getConverter(int fieldIndex) - { - if (fieldIndex == 0) { - return entryConverter; - } - throw new IllegalArgumentException("Map field must be 0 not " + fieldIndex); - } - - @Override - public void start() - { - if (builder == null) { - if (nullBuilder == null || (nullBuilder.getPositionCount() >= NULL_BUILDER_POSITIONS_THRESHOLD && nullBuilder.getSizeInBytes() >= NULL_BUILDER_SIZE_IN_BYTES_THRESHOLD)) { - nullBuilder = mapType.createBlockBuilder(null, NULL_BUILDER_POSITIONS_THRESHOLD); - } - currentEntryBuilder = nullBuilder.beginBlockEntry(); - } - else { - while (builder.getPositionCount() < fieldIndex) { - builder.appendNull(); - } - currentEntryBuilder = builder.beginBlockEntry(); - } - entryConverter.beforeValue(currentEntryBuilder); - } - - @Override - public void end() - { - entryConverter.afterValue(); - - if (builder == null) { - nullBuilder.closeEntry(); - } - else { - builder.closeEntry(); - } - } - - @Override - public void afterValue() - { - } - - @Override - public Block getBlock() - { - checkState(builder == null && nullBuilder != null); // check that user requested a result block (builder == null), and the program followed the request (nullBuilder != null) - return nullBuilder.getObject(nullBuilder.getPositionCount() - 1, Block.class); - } - } - - private static class ParquetMapEntryConverter - extends GroupConverter - implements BlockConverter - { - private final BlockConverter keyConverter; - private final BlockConverter valueConverter; - - private BlockBuilder builder; - - public ParquetMapEntryConverter(Type prestoType, String columnName, GroupType entryType) - { - checkArgument(MAP.equals(prestoType.getTypeSignature().getBase())); - // original version of parquet used null for entry due to a bug - if (entryType.getOriginalType() != null) { - checkArgument( - entryType.getOriginalType() == MAP_KEY_VALUE, - "Expected MAP column '%s' field to be type %s, but is %s", - columnName, - MAP_KEY_VALUE, - entryType); - } - - GroupType entryGroupType = entryType.asGroupType(); - checkArgument( - entryGroupType.getFieldCount() == 2, - "Expected MAP column '%s' entry to have two fields, but has %s fields", - columnName, - entryGroupType.getFieldCount()); - checkArgument( - entryGroupType.getFieldName(0).equals("key"), - "Expected MAP column '%s' entry field 0 to be named 'key', but is named %s", - columnName, - entryGroupType.getFieldName(0)); - checkArgument( - entryGroupType.getFieldName(1).equals("value"), - "Expected MAP column '%s' entry field 1 to be named 'value', but is named %s", - columnName, - entryGroupType.getFieldName(1)); - checkArgument( - entryGroupType.getType(0).isPrimitive(), - "Expected MAP column '%s' entry field 0 to be primitive, but is %s", - columnName, - entryGroupType.getType(0)); - - keyConverter = createConverter(prestoType.getTypeParameters().get(0), columnName + ".key", entryGroupType.getFields().get(0), 0); - valueConverter = createConverter(prestoType.getTypeParameters().get(1), columnName + ".value", entryGroupType.getFields().get(1), 1); - } - - @Override - public Converter getConverter(int fieldIndex) - { - if (fieldIndex == 0) { - return (Converter) keyConverter; - } - if (fieldIndex == 1) { - return (Converter) valueConverter; - } - throw new IllegalArgumentException("Map entry field must be 0 or 1 not " + fieldIndex); - } - - @Override - public void beforeValue(BlockBuilder builder) - { - this.builder = builder; - } - - @Override - public void start() - { - keyConverter.beforeValue(builder); - valueConverter.beforeValue(builder); - } - - @Override - public void end() - { - keyConverter.afterValue(); - valueConverter.afterValue(); - // handle the case where we have a key, but the value is null - // null keys are not supported anyway, so we can ignore that case here - if (builder.getPositionCount() % 2 != 0) { - builder.appendNull(); - } - } - - @Override - public void afterValue() - { - } - } - - private static class ParquetPrimitiveConverter - extends PrimitiveConverter - implements BlockConverter - { - private final Type type; - private final int fieldIndex; - private BlockBuilder builder; - - public ParquetPrimitiveConverter(Type type, int fieldIndex) - { - this.type = type; - this.fieldIndex = fieldIndex; - } - - @Override - public void beforeValue(BlockBuilder builder) - { - this.builder = requireNonNull(builder, "parent builder is null"); - } - - @Override - public void afterValue() - { - } - - private void addMissingValues() - { - while (builder.getPositionCount() < fieldIndex) { - builder.appendNull(); - } - } - - @Override - public boolean isPrimitive() - { - return true; - } - - @Override - public PrimitiveConverter asPrimitiveConverter() - { - return this; - } - - @Override - public boolean hasDictionarySupport() - { - return false; - } - - @Override - public void setDictionary(Dictionary dictionary) - { - } - - @Override - public void addValueFromDictionary(int dictionaryId) - { - } - - @Override - public void addBoolean(boolean value) - { - addMissingValues(); - type.writeBoolean(builder, value); - } - - @Override - public void addDouble(double value) - { - addMissingValues(); - type.writeDouble(builder, value); - } - - @Override - public void addLong(long value) - { - addMissingValues(); - type.writeLong(builder, value); - } - - @Override - public void addBinary(Binary value) - { - addMissingValues(); - if (type == TIMESTAMP) { - type.writeLong(builder, ParquetTimestampUtils.getTimestampMillis(value)); - } - else if (isVarcharType(type)) { - type.writeSlice(builder, truncateToLength(wrappedBuffer(value.getBytes()), type)); - } - else if (isCharType(type)) { - type.writeSlice(builder, truncateToLengthAndTrimSpaces(wrappedBuffer(value.getBytes()), type)); - } - else { - type.writeSlice(builder, wrappedBuffer(value.getBytes())); - } - } - - @Override - public void addFloat(float value) - { - addMissingValues(); - type.writeLong(builder, floatToRawIntBits(value)); - } - - @Override - public void addInt(int value) - { - addMissingValues(); - type.writeLong(builder, value); - } - } - - private static class ParquetDecimalConverter - extends PrimitiveConverter - implements BlockConverter - { - private final DecimalType decimalType; - private BlockBuilder builder; - private boolean wroteValue; - - public ParquetDecimalConverter(DecimalType decimalType) - { - this.decimalType = requireNonNull(decimalType, "decimalType is null"); - } - - @Override - public void beforeValue(BlockBuilder builder) - { - this.builder = requireNonNull(builder, "parent builder is null"); - wroteValue = false; - } - - @Override - public void afterValue() - { - if (wroteValue) { - return; - } - - builder.appendNull(); - } - - @Override - public boolean isPrimitive() - { - return true; - } - - @Override - public PrimitiveConverter asPrimitiveConverter() - { - return this; - } - - @Override - public boolean hasDictionarySupport() - { - return false; - } - - @Override - public void addBinary(Binary value) - { - if (decimalType.isShort()) { - decimalType.writeLong(builder, DecimalUtils.getShortDecimalValue(value.getBytes())); - } - else { - BigInteger unboundedDecimal = new BigInteger(value.getBytes()); - decimalType.writeSlice(builder, Decimals.encodeUnscaledValue(unboundedDecimal)); - } - wroteValue = true; - } - } -} diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetPageSource.java b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetPageSource.java index 690b03dc26c2c..7beeaa1f3c159 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetPageSource.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetPageSource.java @@ -14,7 +14,9 @@ package com.facebook.presto.hive.parquet; import com.facebook.presto.hive.HiveColumnHandle; -import com.facebook.presto.hive.parquet.reader.ParquetReader; +import com.facebook.presto.parquet.Field; +import com.facebook.presto.parquet.ParquetCorruptionException; +import com.facebook.presto.parquet.reader.ParquetReader; import com.facebook.presto.spi.ConnectorPageSource; import com.facebook.presto.spi.Page; import com.facebook.presto.spi.PrestoException; @@ -38,9 +40,9 @@ import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR; import static com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA; import static com.facebook.presto.hive.HiveErrorCode.HIVE_CURSOR_ERROR; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getFieldIndex; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetType; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.lookupColumnByName; +import static com.facebook.presto.hive.parquet.ParquetPageSourceFactory.getParquetType; +import static com.facebook.presto.parquet.ParquetTypeUtils.getFieldIndex; +import static com.facebook.presto.parquet.ParquetTypeUtils.lookupColumnByName; import static com.google.common.base.Preconditions.checkState; import static java.util.Objects.requireNonNull; import static parquet.io.ColumnIOConverter.constructField; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetPageSourceFactory.java b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetPageSourceFactory.java index 5dd5a33f081be..479221ce449b2 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetPageSourceFactory.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetPageSourceFactory.java @@ -17,15 +17,20 @@ import com.facebook.presto.hive.HdfsEnvironment; import com.facebook.presto.hive.HiveColumnHandle; import com.facebook.presto.hive.HivePageSourceFactory; -import com.facebook.presto.hive.parquet.predicate.ParquetPredicate; -import com.facebook.presto.hive.parquet.reader.ParquetMetadataReader; -import com.facebook.presto.hive.parquet.reader.ParquetReader; import com.facebook.presto.memory.context.AggregatedMemoryContext; +import com.facebook.presto.parquet.ParquetDataSource; +import com.facebook.presto.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.predicate.Predicate; +import com.facebook.presto.parquet.reader.MetadataReader; +import com.facebook.presto.parquet.reader.ParquetReader; import com.facebook.presto.spi.ConnectorPageSource; import com.facebook.presto.spi.ConnectorSession; import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.predicate.Domain; import com.facebook.presto.spi.predicate.TupleDomain; import com.facebook.presto.spi.type.TypeManager; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; @@ -46,6 +51,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Objects; import java.util.Optional; import java.util.Properties; @@ -54,22 +60,20 @@ import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR; import static com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT; import static com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA; -import static com.facebook.presto.hive.HiveSessionProperties.isParquetOptimizedReaderEnabled; -import static com.facebook.presto.hive.HiveSessionProperties.isParquetPredicatePushdownEnabled; import static com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames; import static com.facebook.presto.hive.HiveUtil.getDeserializerClassName; import static com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getColumnIO; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getDescriptors; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetType; -import static com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.buildParquetPredicate; -import static com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.getParquetTupleDomain; -import static com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.predicateMatches; import static com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext; +import static com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO; +import static com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors; +import static com.facebook.presto.parquet.ParquetTypeUtils.getParquetTypeByName; +import static com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate; +import static com.facebook.presto.parquet.predicate.PredicateUtils.predicateMatches; import static com.google.common.base.Strings.nullToEmpty; import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.toList; +import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE; public class ParquetPageSourceFactory implements HivePageSourceFactory @@ -104,10 +108,6 @@ public Optional createPageSource( TupleDomain effectivePredicate, DateTimeZone hiveStorageTimeZone) { - if (!isParquetOptimizedReaderEnabled(session)) { - return Optional.empty(); - } - if (!PARQUET_SERDE_CLASS_NAMES.contains(getDeserializerClassName(schema))) { return Optional.empty(); } @@ -124,7 +124,6 @@ public Optional createPageSource( columns, isUseParquetColumnNames(session), typeManager, - isParquetPredicatePushdownEnabled(session), effectivePredicate, stats)); } @@ -141,7 +140,6 @@ public static ParquetPageSource createParquetPageSource( List columns, boolean useParquetColumnNames, TypeManager typeManager, - boolean predicatePushdownEnabled, TupleDomain effectivePredicate, FileFormatDataSourceStats stats) { @@ -151,7 +149,7 @@ public static ParquetPageSource createParquetPageSource( try { FileSystem fileSystem = hdfsEnvironment.getFileSystem(user, path, configuration); FSDataInputStream inputStream = fileSystem.open(path); - ParquetMetadata parquetMetadata = ParquetMetadataReader.readFooter(inputStream, path, fileSize); + ParquetMetadata parquetMetadata = MetadataReader.readFooter(inputStream, path, fileSize); FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); MessageType fileSchema = fileMetaData.getSchema(); dataSource = buildHdfsParquetDataSource(inputStream, path, fileSize, stats); @@ -172,15 +170,13 @@ public static ParquetPageSource createParquetPageSource( } } - if (predicatePushdownEnabled) { - Map, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema); - TupleDomain parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate); - ParquetPredicate parquetPredicate = buildParquetPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath); - final ParquetDataSource finalDataSource = dataSource; - blocks = blocks.stream() - .filter(block -> predicateMatches(parquetPredicate, block, finalDataSource, descriptorsByPath, parquetTupleDomain)) - .collect(toList()); - } + Map, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema); + TupleDomain parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate); + Predicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath); + final ParquetDataSource finalDataSource = dataSource; + blocks = blocks.stream() + .filter(block -> predicateMatches(parquetPredicate, block, finalDataSource, descriptorsByPath, parquetTupleDomain)) + .collect(toList()); MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema); ParquetReader parquetReader = new ParquetReader( messageColumnIO, @@ -220,4 +216,38 @@ public static ParquetPageSource createParquetPageSource( throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e); } } + + public static TupleDomain getParquetTupleDomain(Map, RichColumnDescriptor> descriptorsByPath, TupleDomain effectivePredicate) + { + if (effectivePredicate.isNone()) { + return TupleDomain.none(); + } + + ImmutableMap.Builder predicate = ImmutableMap.builder(); + for (Entry entry : effectivePredicate.getDomains().get().entrySet()) { + HiveColumnHandle columnHandle = entry.getKey(); + // skip looking up predicates for complex types as Parquet only stores stats for primitives + if (!columnHandle.getHiveType().getCategory().equals(PRIMITIVE)) { + continue; + } + + RichColumnDescriptor descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName())); + if (descriptor != null) { + predicate.put(descriptor, entry.getValue()); + } + } + return TupleDomain.withColumnDomains(predicate.build()); + } + + public static parquet.schema.Type getParquetType(HiveColumnHandle column, MessageType messageType, boolean useParquetColumnNames) + { + if (useParquetColumnNames) { + return getParquetTypeByName(column.getName(), messageType); + } + + if (column.getHiveColumnIndex() < messageType.getFieldCount()) { + return messageType.getType(column.getHiveColumnIndex()); + } + return null; + } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetRecordCursorProvider.java b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetRecordCursorProvider.java deleted file mode 100644 index d39f7f16779e0..0000000000000 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetRecordCursorProvider.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.hive.parquet; - -import com.facebook.presto.hive.FileFormatDataSourceStats; -import com.facebook.presto.hive.HdfsEnvironment; -import com.facebook.presto.hive.HiveColumnHandle; -import com.facebook.presto.hive.HiveRecordCursorProvider; -import com.facebook.presto.spi.ConnectorSession; -import com.facebook.presto.spi.RecordCursor; -import com.facebook.presto.spi.predicate.TupleDomain; -import com.facebook.presto.spi.type.TypeManager; -import com.google.common.collect.ImmutableSet; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.joda.time.DateTimeZone; - -import javax.inject.Inject; - -import java.util.List; -import java.util.Optional; -import java.util.Properties; -import java.util.Set; - -import static com.facebook.presto.hive.HiveSessionProperties.isParquetPredicatePushdownEnabled; -import static com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames; -import static com.facebook.presto.hive.HiveUtil.getDeserializerClassName; -import static java.util.Objects.requireNonNull; - -public class ParquetRecordCursorProvider - implements HiveRecordCursorProvider -{ - private static final Set PARQUET_SERDE_CLASS_NAMES = ImmutableSet.builder() - .add("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe") - .add("parquet.hive.serde.ParquetHiveSerDe") - .build(); - - private final HdfsEnvironment hdfsEnvironment; - private final FileFormatDataSourceStats stats; - - @Inject - public ParquetRecordCursorProvider(HdfsEnvironment hdfsEnvironment, FileFormatDataSourceStats stats) - { - this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); - this.stats = requireNonNull(stats, "stats is null"); - } - - @Override - public Optional createRecordCursor( - Configuration configuration, - ConnectorSession session, - Path path, - long start, - long length, - long fileSize, - Properties schema, - List columns, - TupleDomain effectivePredicate, - DateTimeZone hiveStorageTimeZone, - TypeManager typeManager) - { - if (!PARQUET_SERDE_CLASS_NAMES.contains(getDeserializerClassName(schema))) { - return Optional.empty(); - } - - return Optional.of(new ParquetHiveRecordCursor( - hdfsEnvironment, - session.getUser(), - configuration, - path, - start, - length, - fileSize, - schema, - columns, - isUseParquetColumnNames(session), - typeManager, - isParquetPredicatePushdownEnabled(session), - effectivePredicate, - stats)); - } -} diff --git a/presto-hive/src/main/java/parquet/io/ColumnIOConverter.java b/presto-hive/src/main/java/parquet/io/ColumnIOConverter.java index ca05ba94e8b1c..d1bb50f57de79 100644 --- a/presto-hive/src/main/java/parquet/io/ColumnIOConverter.java +++ b/presto-hive/src/main/java/parquet/io/ColumnIOConverter.java @@ -13,10 +13,10 @@ */ package parquet.io; -import com.facebook.presto.hive.parquet.Field; -import com.facebook.presto.hive.parquet.GroupField; -import com.facebook.presto.hive.parquet.PrimitiveField; -import com.facebook.presto.hive.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.Field; +import com.facebook.presto.parquet.GroupField; +import com.facebook.presto.parquet.PrimitiveField; +import com.facebook.presto.parquet.RichColumnDescriptor; import com.facebook.presto.spi.type.MapType; import com.facebook.presto.spi.type.NamedTypeSignature; import com.facebook.presto.spi.type.Type; @@ -27,9 +27,9 @@ import java.util.Locale; import java.util.Optional; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getArrayElementColumn; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getMapKeyValueColumn; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.lookupColumnByName; +import static com.facebook.presto.parquet.ParquetTypeUtils.getArrayElementColumn; +import static com.facebook.presto.parquet.ParquetTypeUtils.getMapKeyValueColumn; +import static com.facebook.presto.parquet.ParquetTypeUtils.lookupColumnByName; import static com.facebook.presto.spi.type.StandardTypes.ARRAY; import static com.facebook.presto.spi.type.StandardTypes.MAP; import static com.facebook.presto.spi.type.StandardTypes.ROW; diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java index 5abe1c649d33a..5f844b6a50d80 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java @@ -35,7 +35,6 @@ import com.facebook.presto.hive.metastore.thrift.TestingHiveCluster; import com.facebook.presto.hive.metastore.thrift.ThriftHiveMetastore; import com.facebook.presto.hive.orc.OrcPageSource; -import com.facebook.presto.hive.parquet.ParquetHiveRecordCursor; import com.facebook.presto.hive.parquet.ParquetPageSource; import com.facebook.presto.hive.rcfile.RcFilePageSource; import com.facebook.presto.metadata.MetadataManager; @@ -4108,10 +4107,6 @@ protected static void assertPageSourceType(ConnectorPageSource pageSource, HiveS private static Class recordCursorType(HiveStorageFormat hiveStorageFormat) { - switch (hiveStorageFormat) { - case PARQUET: - return ParquetHiveRecordCursor.class; - } return GenericHiveRecordCursor.class; } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/HiveTestUtils.java b/presto-hive/src/test/java/com/facebook/presto/hive/HiveTestUtils.java index f7d5712a140dc..0efc3f12fe53e 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/HiveTestUtils.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/HiveTestUtils.java @@ -19,7 +19,6 @@ import com.facebook.presto.hive.orc.DwrfPageSourceFactory; import com.facebook.presto.hive.orc.OrcPageSourceFactory; import com.facebook.presto.hive.parquet.ParquetPageSourceFactory; -import com.facebook.presto.hive.parquet.ParquetRecordCursorProvider; import com.facebook.presto.hive.rcfile.RcFilePageSourceFactory; import com.facebook.presto.hive.s3.HiveS3Config; import com.facebook.presto.hive.s3.PrestoS3ConfigurationUpdater; @@ -82,7 +81,6 @@ public static Set getDefaultHiveRecordCursorProvider(H { HdfsEnvironment testHdfsEnvironment = createTestHdfsEnvironment(hiveClientConfig); return ImmutableSet.builder() - .add(new ParquetRecordCursorProvider(testHdfsEnvironment, new FileFormatDataSourceStats())) .add(new GenericHiveRecordCursorProvider(testHdfsEnvironment)) .build(); } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveClientConfig.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveClientConfig.java index 9f2f8c3b58fa7..889aab830ad91 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveClientConfig.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveClientConfig.java @@ -81,8 +81,6 @@ public void testDefaults() .setTextMaxLineLength(new DataSize(100, Unit.MEGABYTE)) .setUseParquetColumnNames(false) .setUseOrcColumnNames(false) - .setParquetPredicatePushdownEnabled(true) - .setParquetOptimizedReaderEnabled(true) .setAssumeCanonicalPartitionKeys(false) .setOrcBloomFiltersEnabled(false) .setOrcDefaultBloomFilterFpp(0.05) @@ -159,8 +157,6 @@ public void testExplicitPropertyMappings() .put("hive.text.max-line-length", "13MB") .put("hive.parquet.use-column-names", "true") .put("hive.orc.use-column-names", "true") - .put("hive.parquet-predicate-pushdown.enabled", "false") - .put("hive.parquet-optimized-reader.enabled", "false") .put("hive.orc.bloom-filters.enabled", "true") .put("hive.orc.default-bloom-filter-fpp", "0.96") .put("hive.orc.max-merge-distance", "22kB") @@ -233,8 +229,6 @@ public void testExplicitPropertyMappings() .setTextMaxLineLength(new DataSize(13, Unit.MEGABYTE)) .setUseParquetColumnNames(true) .setUseOrcColumnNames(true) - .setParquetPredicatePushdownEnabled(false) - .setParquetOptimizedReaderEnabled(false) .setAssumeCanonicalPartitionKeys(true) .setOrcBloomFiltersEnabled(true) .setOrcDefaultBloomFilterFpp(0.96) diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveFileFormats.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveFileFormats.java index 86c816056f2e6..ae28f77635eab 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveFileFormats.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveFileFormats.java @@ -16,7 +16,6 @@ import com.facebook.presto.hive.orc.DwrfPageSourceFactory; import com.facebook.presto.hive.orc.OrcPageSourceFactory; import com.facebook.presto.hive.parquet.ParquetPageSourceFactory; -import com.facebook.presto.hive.parquet.ParquetRecordCursorProvider; import com.facebook.presto.hive.rcfile.RcFilePageSourceFactory; import com.facebook.presto.orc.OrcWriterOptions; import com.facebook.presto.spi.ConnectorPageSource; @@ -25,8 +24,6 @@ import com.facebook.presto.spi.RecordCursor; import com.facebook.presto.spi.RecordPageSource; import com.facebook.presto.spi.predicate.TupleDomain; -import com.facebook.presto.spi.type.ArrayType; -import com.facebook.presto.spi.type.RowType; import com.facebook.presto.testing.TestingConnectorSession; import com.google.common.base.Joiner; import com.google.common.collect.ImmutableList; @@ -36,7 +33,6 @@ import io.airlift.compress.lzo.LzoCodec; import io.airlift.compress.lzo.LzopCodec; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; @@ -74,10 +70,6 @@ import static com.facebook.presto.hive.HiveTestUtils.SESSION; import static com.facebook.presto.hive.HiveTestUtils.TYPE_MANAGER; import static com.facebook.presto.hive.HiveTestUtils.getTypes; -import static com.facebook.presto.spi.type.IntegerType.INTEGER; -import static com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType; -import static com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf; -import static com.facebook.presto.tests.StructuralTestUtil.rowBlockOf; import static com.google.common.base.Predicates.not; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.Iterables.filter; @@ -87,11 +79,7 @@ import static java.util.stream.Collectors.toList; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT; import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB; -import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector; -import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector; import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector; -import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector; -import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertTrue; @@ -101,13 +89,8 @@ public class TestHiveFileFormats extends AbstractTestHiveFileFormats { private static final FileFormatDataSourceStats STATS = new FileFormatDataSourceStats(); - private static TestingConnectorSession parquetCursorSession = new TestingConnectorSession(new HiveSessionProperties(createParquetHiveClientConfig(false, false, false), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); - private static TestingConnectorSession parquetCursorSessionUseName = new TestingConnectorSession(new HiveSessionProperties(createParquetHiveClientConfig(false, false, true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); - private static TestingConnectorSession parquetCursorPushdownSession = new TestingConnectorSession(new HiveSessionProperties(createParquetHiveClientConfig(false, true, false), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); - private static TestingConnectorSession parquetCursorPushdownSessionUseName = new TestingConnectorSession(new HiveSessionProperties(createParquetHiveClientConfig(false, true, true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); - private static TestingConnectorSession parquetPageSourceSession = new TestingConnectorSession(new HiveSessionProperties(createParquetHiveClientConfig(true, false, false), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); - private static TestingConnectorSession parquetPageSourceSessionUseName = new TestingConnectorSession(new HiveSessionProperties(createParquetHiveClientConfig(true, false, true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); - private static TestingConnectorSession parquetPageSourcePushdown = new TestingConnectorSession(new HiveSessionProperties(createParquetHiveClientConfig(true, true, false), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); + private static TestingConnectorSession parquetPageSourceSession = new TestingConnectorSession(new HiveSessionProperties(createParquetHiveClientConfig(false), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); + private static TestingConnectorSession parquetPageSourceSessionUseName = new TestingConnectorSession(new HiveSessionProperties(createParquetHiveClientConfig(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); private static final DateTimeZone HIVE_STORAGE_TIME_ZONE = DateTimeZone.forID("Asia/Katmandu"); @@ -341,43 +324,6 @@ private static List getTestColumnsSupportedByAvro() .collect(toList()); } - @Test(dataProvider = "rowCount") - public void testParquet(int rowCount) - throws Exception - { - List testColumns = getTestColumnsSupportedByParquet(); - assertThatFileFormat(PARQUET) - .withColumns(testColumns) - .withRowsCount(rowCount) - .withSession(parquetCursorSession) - .isReadableByRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS)); - assertThatFileFormat(PARQUET) - .withColumns(testColumns) - .withRowsCount(rowCount) - .withSession(parquetCursorPushdownSession) - .isReadableByRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS)); - } - - @Test(dataProvider = "rowCount") - public void testParquetCaseInsensitiveColumnLookup(int rowCount) - throws Exception - { - List writeColumns = ImmutableList.of(new TestColumn("column_name", javaStringObjectInspector, "test", utf8Slice("test"), false)); - List readColumns = ImmutableList.of(new TestColumn("Column_Name", javaStringObjectInspector, "test", utf8Slice("test"), false)); - assertThatFileFormat(PARQUET) - .withWriteColumns(writeColumns) - .withReadColumns(readColumns) - .withRowsCount(rowCount) - .withSession(parquetCursorSessionUseName) - .isReadableByRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS)); - assertThatFileFormat(PARQUET) - .withWriteColumns(writeColumns) - .withReadColumns(readColumns) - .withRowsCount(rowCount) - .withSession(parquetCursorPushdownSessionUseName) - .isReadableByRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS)); - } - @Test(dataProvider = "rowCount") public void testParquetPageSource(int rowCount) throws Exception @@ -388,11 +334,6 @@ public void testParquetPageSource(int rowCount) .withSession(parquetPageSourceSession) .withRowsCount(rowCount) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); - assertThatFileFormat(PARQUET) - .withColumns(testColumns) - .withSession(parquetPageSourcePushdown) - .withRowsCount(rowCount) - .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); } @Test(dataProvider = "rowCount") @@ -426,26 +367,6 @@ public void testParquetPageSourceSchemaEvolution(int rowCount) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); } - @Test(dataProvider = "rowCount") - public void testParquetUseColumnNames(int rowCount) - throws Exception - { - List writeColumns = getTestColumnsSupportedByParquet(); - List readColumns = Lists.reverse(writeColumns); - assertThatFileFormat(PARQUET) - .withWriteColumns(writeColumns) - .withReadColumns(readColumns) - .withRowsCount(rowCount) - .withSession(parquetCursorSessionUseName) - .isReadableByRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS)); - assertThatFileFormat(PARQUET) - .withWriteColumns(writeColumns) - .withReadColumns(readColumns) - .withRowsCount(rowCount) - .withSession(parquetCursorPushdownSessionUseName) - .isReadableByRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS)); - } - private static List getTestColumnsSupportedByParquet() { // Write of complex hive data to Parquet is broken @@ -461,43 +382,6 @@ private static List getTestColumnsSupportedByParquet() .collect(toList()); } - @Test(dataProvider = "rowCount") - public void testParquetThrift(int rowCount) - { - RowType nameType = RowType.anonymous(ImmutableList.of(createUnboundedVarcharType(), createUnboundedVarcharType())); - RowType phoneType = RowType.anonymous(ImmutableList.of(createUnboundedVarcharType(), createUnboundedVarcharType())); - RowType personType = RowType.anonymous(ImmutableList.of(nameType, INTEGER, createUnboundedVarcharType(), new ArrayType(phoneType))); - - List testColumns = ImmutableList.of( - new TestColumn( - "persons", - getStandardListObjectInspector( - getStandardStructObjectInspector( - ImmutableList.of("name", "id", "email", "phones"), - ImmutableList.of( - getStandardStructObjectInspector( - ImmutableList.of("first_name", "last_name"), - ImmutableList.of(javaStringObjectInspector, javaStringObjectInspector)), - javaIntObjectInspector, - javaStringObjectInspector, - getStandardListObjectInspector( - getStandardStructObjectInspector( - ImmutableList.of("number", "type"), - ImmutableList.of(javaStringObjectInspector, javaStringObjectInspector)))))), - null, - arrayBlockOf(personType, - rowBlockOf(ImmutableList.of(nameType, INTEGER, createUnboundedVarcharType(), new ArrayType(phoneType)), - rowBlockOf(ImmutableList.of(createUnboundedVarcharType(), createUnboundedVarcharType()), "Bob", "Roberts"), - 0, - "bob.roberts@example.com", - arrayBlockOf(phoneType, rowBlockOf(ImmutableList.of(createUnboundedVarcharType(), createUnboundedVarcharType()), "1234567890", null)))))); - - File file = new File(this.getClass().getClassLoader().getResource("addressbook.parquet").getPath()); - FileSplit split = new FileSplit(new Path(file.getAbsolutePath()), 0, file.length(), new String[0]); - HiveRecordCursorProvider cursorProvider = new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS); - testCursorProvider(cursorProvider, split, PARQUET, testColumns, SESSION, 1); - } - @Test(dataProvider = "rowCount") public void testDwrf(int rowCount) throws Exception @@ -563,27 +447,11 @@ public void testTruncateVarcharColumn() .withReadColumns(ImmutableList.of(readColumn)) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS)); - assertThatFileFormat(PARQUET) - .withWriteColumns(ImmutableList.of(writeColumn)) - .withReadColumns(ImmutableList.of(readColumn)) - .withSession(parquetCursorSession) - .isReadableByRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS)); - assertThatFileFormat(PARQUET) - .withWriteColumns(ImmutableList.of(writeColumn)) - .withReadColumns(ImmutableList.of(readColumn)) - .withSession(parquetCursorPushdownSession) - .isReadableByRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS)); - assertThatFileFormat(PARQUET) .withWriteColumns(ImmutableList.of(writeColumn)) .withReadColumns(ImmutableList.of(readColumn)) .withSession(parquetPageSourceSession) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); - assertThatFileFormat(PARQUET) - .withWriteColumns(ImmutableList.of(writeColumn)) - .withReadColumns(ImmutableList.of(readColumn)) - .withSession(parquetPageSourcePushdown) - .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); assertThatFileFormat(AVRO) .withWriteColumns(ImmutableList.of(writeColumn)) @@ -627,23 +495,10 @@ public void testFailForLongVarcharPartitionColumn() .withColumns(columns) .isFailingForPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage); - assertThatFileFormat(PARQUET) - .withColumns(columns) - .withSession(parquetCursorSession) - .isFailingForRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage); - assertThatFileFormat(PARQUET) - .withColumns(columns) - .withSession(parquetCursorPushdownSession) - .isFailingForRecordCursor(new ParquetRecordCursorProvider(HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage); - assertThatFileFormat(PARQUET) .withColumns(columns) .withSession(parquetPageSourceSession) .isFailingForPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage); - assertThatFileFormat(PARQUET) - .withColumns(columns) - .withSession(parquetPageSourcePushdown) - .isFailingForPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage); assertThatFileFormat(SEQUENCEFILE) .withColumns(columns) @@ -789,12 +644,10 @@ private FileFormatAssertion assertThatFileFormat(HiveStorageFormat hiveStorageFo .withStorageFormat(hiveStorageFormat); } - private static HiveClientConfig createParquetHiveClientConfig(boolean enableOptimizedReader, boolean enablePredicatePushDown, boolean useParquetColumnNames) + private static HiveClientConfig createParquetHiveClientConfig(boolean useParquetColumnNames) { HiveClientConfig config = new HiveClientConfig(); - config.setParquetOptimizedReaderEnabled(enableOptimizedReader) - .setParquetPredicatePushdownEnabled(enablePredicatePushDown) - .setUseParquetColumnNames(useParquetColumnNames); + config.setUseParquetColumnNames(useParquetColumnNames); return config; } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java index 86fc3b2d4bec5..70d7b21c119f5 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java @@ -3070,9 +3070,6 @@ private List getAllTestingHiveStorageFormat() formats.add(new TestingHiveStorageFormat( Session.builder(session).setCatalogSessionProperty(session.getCatalog().get(), "orc_optimized_writer_enabled", "true").build(), HiveStorageFormat.DWRF)); - formats.add(new TestingHiveStorageFormat( - Session.builder(session).setCatalogSessionProperty(session.getCatalog().get(), "parquet_optimized_reader_enabled", "true").build(), - HiveStorageFormat.PARQUET)); return formats.build(); } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/benchmark/FileFormat.java b/presto-hive/src/test/java/com/facebook/presto/hive/benchmark/FileFormat.java index 3c6125c90dd9c..c64fb4b7a5fd9 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/benchmark/FileFormat.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/benchmark/FileFormat.java @@ -30,7 +30,6 @@ import com.facebook.presto.hive.orc.DwrfPageSourceFactory; import com.facebook.presto.hive.orc.OrcPageSourceFactory; import com.facebook.presto.hive.parquet.ParquetPageSourceFactory; -import com.facebook.presto.hive.parquet.ParquetRecordCursorProvider; import com.facebook.presto.hive.rcfile.RcFilePageSourceFactory; import com.facebook.presto.orc.OrcWriter; import com.facebook.presto.orc.OrcWriterOptions; @@ -298,8 +297,8 @@ public boolean supportsDate() @Override public ConnectorPageSource createFileFormatReader(ConnectorSession session, HdfsEnvironment hdfsEnvironment, File targetFile, List columnNames, List columnTypes) { - HiveRecordCursorProvider cursorProvider = new ParquetRecordCursorProvider(hdfsEnvironment, new FileFormatDataSourceStats()); - return createPageSource(cursorProvider, session, targetFile, columnNames, columnTypes, HiveStorageFormat.PARQUET); + HivePageSourceFactory pageSourceFactory = new ParquetPageSourceFactory(TYPE_MANAGER, hdfsEnvironment, new FileFormatDataSourceStats()); + return createPageSource(pageSourceFactory, session, targetFile, columnNames, columnTypes, HiveStorageFormat.PARQUET); } @Override diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/benchmark/HiveFileFormatBenchmark.java b/presto-hive/src/test/java/com/facebook/presto/hive/benchmark/HiveFileFormatBenchmark.java index ac9cf6ab6175f..50a7cc516b114 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/benchmark/HiveFileFormatBenchmark.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/benchmark/HiveFileFormatBenchmark.java @@ -94,8 +94,7 @@ public class HiveFileFormatBenchmark } @SuppressWarnings("deprecation") - private static final HiveClientConfig CONFIG = new HiveClientConfig() - .setParquetOptimizedReaderEnabled(true); + private static final HiveClientConfig CONFIG = new HiveClientConfig(); private static final ConnectorSession SESSION = new TestingConnectorSession(new HiveSessionProperties(CONFIG, new OrcFileWriterConfig(), new ParquetFileWriterConfig()) .getSessionProperties()); diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/ParquetTester.java b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/ParquetTester.java index cd621311d71a9..ff69635d1eee5 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/ParquetTester.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/ParquetTester.java @@ -411,8 +411,6 @@ private static HiveClientConfig createHiveClientConfig(boolean useParquetColumnN { HiveClientConfig config = new HiveClientConfig(); config.setHiveStorageFormat(HiveStorageFormat.PARQUET) - .setParquetOptimizedReaderEnabled(OPTIMIZED) - .setParquetPredicatePushdownEnabled(OPTIMIZED) .setUseParquetColumnNames(useParquetColumnNames); return config; } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/predicate/TestParquetPredicateUtils.java b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/predicate/TestParquetPredicateUtils.java index c6e507375aa08..f5baa47dfd630 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/predicate/TestParquetPredicateUtils.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/parquet/predicate/TestParquetPredicateUtils.java @@ -15,7 +15,7 @@ import com.facebook.presto.hive.HiveColumnHandle; import com.facebook.presto.hive.HiveType; -import com.facebook.presto.hive.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.RichColumnDescriptor; import com.facebook.presto.spi.predicate.Domain; import com.facebook.presto.spi.predicate.TupleDomain; import com.facebook.presto.spi.type.ArrayType; @@ -39,9 +39,9 @@ import java.util.Set; import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getDescriptors; -import static com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.getParquetTupleDomain; -import static com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.isOnlyDictionaryEncodingPages; +import static com.facebook.presto.hive.parquet.ParquetPageSourceFactory.getParquetTupleDomain; +import static com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors; +import static com.facebook.presto.parquet.predicate.PredicateUtils.isOnlyDictionaryEncodingPages; import static com.facebook.presto.spi.block.MethodHandleUtil.methodHandle; import static com.facebook.presto.spi.predicate.TupleDomain.withColumnDomains; import static com.facebook.presto.spi.type.BigintType.BIGINT; diff --git a/presto-parquet/pom.xml b/presto-parquet/pom.xml new file mode 100644 index 0000000000000..d2b72c5b4ee65 --- /dev/null +++ b/presto-parquet/pom.xml @@ -0,0 +1,180 @@ + + + 4.0.0 + + + com.facebook.presto + presto-root + 0.213-SNAPSHOT + + + presto-parquet + presto-parquet + + + ${project.parent.basedir} + + + + + com.facebook.presto + presto-memory-context + + + + com.facebook.presto.hadoop + hadoop-apache2 + provided + + + + com.facebook.presto.hive + hive-apache + + + + io.airlift + aircompressor + + + + com.google.guava + guava + + + + it.unimi.dsi + fastutil + + + + org.xerial.snappy + snappy-java + runtime + + + + + io.airlift + log-manager + runtime + + + + + com.facebook.presto + presto-spi + + + + io.airlift + slice + + + + + com.facebook.presto + presto-main + test + + + + com.facebook.presto + presto-tests + test + + + + com.facebook.presto + presto-tpch + test + + + + io.airlift.tpch + tpch + test + + + + org.jetbrains + annotations + provided + + + + org.testng + testng + test + + + + io.airlift + testing + test + + + + org.assertj + assertj-core + test + + + + org.anarres.lzo + lzo-hadoop + test + + + + + com.facebook.presto + presto-benchmark + test + + + + org.openjdk.jmh + jmh-core + test + + + + org.openjdk.jmh + jmh-generator-annprocess + test + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + **/TestFullParquetReader.java + + + + + + + + + ci + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + + + + + + diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetDataPage.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/DataPage.java similarity index 80% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetDataPage.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/DataPage.java index b8d8122a32eac..065684e6f91fb 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetDataPage.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/DataPage.java @@ -11,14 +11,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; +package com.facebook.presto.parquet; -public abstract class ParquetDataPage - extends ParquetPage +public abstract class DataPage + extends Page { protected final int valueCount; - public ParquetDataPage(int compressedSize, int uncompressedSize, int valueCount) + public DataPage(int compressedSize, int uncompressedSize, int valueCount) { super(compressedSize, uncompressedSize); this.valueCount = valueCount; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetDataPageV1.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/DataPageV1.java similarity index 95% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetDataPageV1.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/DataPageV1.java index 5e7a41eebd10d..f278fe1f2ee17 100755 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetDataPageV1.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/DataPageV1.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; +package com.facebook.presto.parquet; import io.airlift.slice.Slice; import parquet.column.statistics.Statistics; @@ -19,8 +19,8 @@ import static com.google.common.base.MoreObjects.toStringHelper; import static java.util.Objects.requireNonNull; -public class ParquetDataPageV1 - extends ParquetDataPage +public class DataPageV1 + extends DataPage { private final Slice slice; private final Statistics statistics; @@ -28,7 +28,7 @@ public class ParquetDataPageV1 private final ParquetEncoding definitionLevelEncoding; private final ParquetEncoding valuesEncoding; - public ParquetDataPageV1( + public DataPageV1( Slice slice, int valueCount, int uncompressedSize, diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetDataPageV2.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/DataPageV2.java similarity index 96% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetDataPageV2.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/DataPageV2.java index 39fb49db8cb6e..9034c0a66a004 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetDataPageV2.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/DataPageV2.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; +package com.facebook.presto.parquet; import io.airlift.slice.Slice; import parquet.column.statistics.Statistics; @@ -19,8 +19,8 @@ import static com.google.common.base.MoreObjects.toStringHelper; import static java.util.Objects.requireNonNull; -public class ParquetDataPageV2 - extends ParquetDataPage +public class DataPageV2 + extends DataPage { private final int rowCount; private final int nullCount; @@ -31,7 +31,7 @@ public class ParquetDataPageV2 private final Statistics statistics; private final boolean isCompressed; - public ParquetDataPageV2( + public DataPageV2( int rowCount, int nullCount, int valueCount, diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetDictionaryPage.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/DictionaryPage.java similarity index 79% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetDictionaryPage.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/DictionaryPage.java index c1b91d5bc43bb..dc48a898105f8 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetDictionaryPage.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/DictionaryPage.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; +package com.facebook.presto.parquet; import io.airlift.slice.Slice; @@ -21,14 +21,14 @@ import static io.airlift.slice.Slices.wrappedBuffer; import static java.util.Objects.requireNonNull; -public class ParquetDictionaryPage - extends ParquetPage +public class DictionaryPage + extends Page { private final Slice slice; private final int dictionarySize; private final ParquetEncoding encoding; - public ParquetDictionaryPage(Slice slice, int dictionarySize, ParquetEncoding encoding) + public DictionaryPage(Slice slice, int dictionarySize, ParquetEncoding encoding) { this(requireNonNull(slice, "slice is null"), slice.length(), @@ -36,7 +36,7 @@ public ParquetDictionaryPage(Slice slice, int dictionarySize, ParquetEncoding en requireNonNull(encoding, "encoding is null")); } - public ParquetDictionaryPage(Slice slice, int uncompressedSize, int dictionarySize, ParquetEncoding encoding) + public DictionaryPage(Slice slice, int uncompressedSize, int dictionarySize, ParquetEncoding encoding) { super(requireNonNull(slice, "slice is null").length(), uncompressedSize); this.slice = slice; @@ -59,9 +59,9 @@ public ParquetEncoding getEncoding() return encoding; } - public ParquetDictionaryPage copy() + public DictionaryPage copy() { - return new ParquetDictionaryPage(wrappedBuffer(Arrays.copyOf(slice.getBytes(), slice.length())), getUncompressedSize(), dictionarySize, encoding); + return new DictionaryPage(wrappedBuffer(Arrays.copyOf(slice.getBytes(), slice.length())), getUncompressedSize(), dictionarySize, encoding); } @Override diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/Field.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/Field.java similarity index 97% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/Field.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/Field.java index 063e426239694..c3ceea2db14f7 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/Field.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/Field.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; +package com.facebook.presto.parquet; import com.facebook.presto.spi.type.Type; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/GroupField.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/GroupField.java similarity index 96% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/GroupField.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/GroupField.java index 934a1c2c43348..8618bb02940f7 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/GroupField.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/GroupField.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; +package com.facebook.presto.parquet; import com.facebook.presto.spi.type.Type; import com.google.common.collect.ImmutableList; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetPage.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/Page.java similarity index 86% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetPage.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/Page.java index 50fee0831ad6a..b5b917af2aa57 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetPage.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/Page.java @@ -11,14 +11,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; +package com.facebook.presto.parquet; -public abstract class ParquetPage +public abstract class Page { protected final int compressedSize; protected final int uncompressedSize; - public ParquetPage(int compressedSize, int uncompressedSize) + public Page(int compressedSize, int uncompressedSize) { this.compressedSize = compressedSize; this.uncompressedSize = uncompressedSize; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetCompressionUtils.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetCompressionUtils.java similarity index 99% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetCompressionUtils.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetCompressionUtils.java index ef9a300a4acdf..65579e6d1ddb8 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetCompressionUtils.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetCompressionUtils.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; +package com.facebook.presto.parquet; import io.airlift.compress.Decompressor; import io.airlift.compress.lzo.LzoDecompressor; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetCorruptionException.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetCorruptionException.java similarity index 96% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetCorruptionException.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetCorruptionException.java index 7065bbe6142e2..d99e72c5f6596 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetCorruptionException.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetCorruptionException.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; +package com.facebook.presto.parquet; import java.io.IOException; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetDataSource.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetDataSource.java similarity index 95% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetDataSource.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetDataSource.java index 75cbdab480b7c..43458a6661997 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetDataSource.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetDataSource.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; +package com.facebook.presto.parquet; import java.io.Closeable; import java.io.IOException; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetEncoding.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetEncoding.java similarity index 73% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetEncoding.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetEncoding.java index 77879cee2e769..fc4c93670c8d3 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetEncoding.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetEncoding.java @@ -11,15 +11,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; - -import com.facebook.presto.hive.parquet.dictionary.ParquetBinaryDictionary; -import com.facebook.presto.hive.parquet.dictionary.ParquetDictionary; -import com.facebook.presto.hive.parquet.dictionary.ParquetDictionaryReader; -import com.facebook.presto.hive.parquet.dictionary.ParquetDoubleDictionary; -import com.facebook.presto.hive.parquet.dictionary.ParquetFloatDictionary; -import com.facebook.presto.hive.parquet.dictionary.ParquetIntegerDictionary; -import com.facebook.presto.hive.parquet.dictionary.ParquetLongDictionary; +package com.facebook.presto.parquet; + +import com.facebook.presto.parquet.dictionary.BinaryDictionary; +import com.facebook.presto.parquet.dictionary.Dictionary; +import com.facebook.presto.parquet.dictionary.DictionaryReader; +import com.facebook.presto.parquet.dictionary.DoubleDictionary; +import com.facebook.presto.parquet.dictionary.FloatDictionary; +import com.facebook.presto.parquet.dictionary.IntegerDictionary; +import com.facebook.presto.parquet.dictionary.LongDictionary; import parquet.bytes.BytesUtils; import parquet.column.ColumnDescriptor; import parquet.column.values.ValuesReader; @@ -51,7 +51,7 @@ public enum ParquetEncoding { PLAIN { @Override - public ValuesReader getValuesReader(ColumnDescriptor descriptor, ParquetValuesType valuesType) + public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { switch (descriptor.getType()) { case BOOLEAN: @@ -76,24 +76,24 @@ public ValuesReader getValuesReader(ColumnDescriptor descriptor, ParquetValuesTy } @Override - public ParquetDictionary initDictionary(ColumnDescriptor descriptor, ParquetDictionaryPage dictionaryPage) + public Dictionary initDictionary(ColumnDescriptor descriptor, DictionaryPage dictionaryPage) throws IOException { switch (descriptor.getType()) { case BINARY: - return new ParquetBinaryDictionary(dictionaryPage); + return new BinaryDictionary(dictionaryPage); case FIXED_LEN_BYTE_ARRAY: - return new ParquetBinaryDictionary(dictionaryPage, descriptor.getTypeLength()); + return new BinaryDictionary(dictionaryPage, descriptor.getTypeLength()); case INT96: - return new ParquetBinaryDictionary(dictionaryPage, INT96_TYPE_LENGTH); + return new BinaryDictionary(dictionaryPage, INT96_TYPE_LENGTH); case INT64: - return new ParquetLongDictionary(dictionaryPage); + return new LongDictionary(dictionaryPage); case DOUBLE: - return new ParquetDoubleDictionary(dictionaryPage); + return new DoubleDictionary(dictionaryPage); case INT32: - return new ParquetIntegerDictionary(dictionaryPage); + return new IntegerDictionary(dictionaryPage); case FLOAT: - return new ParquetFloatDictionary(dictionaryPage); + return new FloatDictionary(dictionaryPage); default: throw new ParquetDecodingException("Dictionary encoding does not support: " + descriptor.getType()); } @@ -102,7 +102,7 @@ public ParquetDictionary initDictionary(ColumnDescriptor descriptor, ParquetDict RLE { @Override - public ValuesReader getValuesReader(ColumnDescriptor descriptor, ParquetValuesType valuesType) + public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { int bitWidth = BytesUtils.getWidthFromMaxInt(getMaxLevel(descriptor, valuesType)); if (bitWidth == 0) { @@ -114,7 +114,7 @@ public ValuesReader getValuesReader(ColumnDescriptor descriptor, ParquetValuesTy BIT_PACKED { @Override - public ValuesReader getValuesReader(ColumnDescriptor descriptor, ParquetValuesType valuesType) + public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { return new ByteBitPackingValuesReader(getMaxLevel(descriptor, valuesType), BIG_ENDIAN); } @@ -122,13 +122,13 @@ public ValuesReader getValuesReader(ColumnDescriptor descriptor, ParquetValuesTy PLAIN_DICTIONARY { @Override - public ValuesReader getDictionaryBasedValuesReader(ColumnDescriptor descriptor, ParquetValuesType valuesType, ParquetDictionary dictionary) + public ValuesReader getDictionaryBasedValuesReader(ColumnDescriptor descriptor, ValuesType valuesType, Dictionary dictionary) { return RLE_DICTIONARY.getDictionaryBasedValuesReader(descriptor, valuesType, dictionary); } @Override - public ParquetDictionary initDictionary(ColumnDescriptor descriptor, ParquetDictionaryPage dictionaryPage) + public Dictionary initDictionary(ColumnDescriptor descriptor, DictionaryPage dictionaryPage) throws IOException { return PLAIN.initDictionary(descriptor, dictionaryPage); @@ -143,7 +143,7 @@ public boolean usesDictionary() DELTA_BINARY_PACKED { @Override - public ValuesReader getValuesReader(ColumnDescriptor descriptor, ParquetValuesType valuesType) + public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { checkArgument(descriptor.getType() == INT32, "Encoding DELTA_BINARY_PACKED is only supported for type INT32"); return new DeltaBinaryPackingValuesReader(); @@ -152,7 +152,7 @@ public ValuesReader getValuesReader(ColumnDescriptor descriptor, ParquetValuesTy DELTA_LENGTH_BYTE_ARRAY { @Override - public ValuesReader getValuesReader(ColumnDescriptor descriptor, ParquetValuesType valuesType) + public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { checkArgument(descriptor.getType() == BINARY, "Encoding DELTA_LENGTH_BYTE_ARRAY is only supported for type BINARY"); return new DeltaLengthByteArrayValuesReader(); @@ -161,7 +161,7 @@ public ValuesReader getValuesReader(ColumnDescriptor descriptor, ParquetValuesTy DELTA_BYTE_ARRAY { @Override - public ValuesReader getValuesReader(ColumnDescriptor descriptor, ParquetValuesType valuesType) + public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { checkArgument( descriptor.getType() == BINARY || descriptor.getType() == FIXED_LEN_BYTE_ARRAY, @@ -172,13 +172,13 @@ public ValuesReader getValuesReader(ColumnDescriptor descriptor, ParquetValuesTy RLE_DICTIONARY { @Override - public ValuesReader getDictionaryBasedValuesReader(ColumnDescriptor descriptor, ParquetValuesType valuesType, ParquetDictionary dictionary) + public ValuesReader getDictionaryBasedValuesReader(ColumnDescriptor descriptor, ValuesType valuesType, Dictionary dictionary) { - return new ParquetDictionaryReader(dictionary); + return new DictionaryReader(dictionary); } @Override - public ParquetDictionary initDictionary(ColumnDescriptor descriptor, ParquetDictionaryPage dictionaryPage) + public Dictionary initDictionary(ColumnDescriptor descriptor, DictionaryPage dictionaryPage) throws IOException { return PLAIN.initDictionary(descriptor, dictionaryPage); @@ -193,7 +193,7 @@ public boolean usesDictionary() static final int INT96_TYPE_LENGTH = 12; - static int getMaxLevel(ColumnDescriptor descriptor, ParquetValuesType valuesType) + static int getMaxLevel(ColumnDescriptor descriptor, ValuesType valuesType) { switch (valuesType) { case REPETITION_LEVEL: @@ -205,7 +205,7 @@ static int getMaxLevel(ColumnDescriptor descriptor, ParquetValuesType valuesType return 1; } default: - throw new ParquetDecodingException("Unsupported Parquet values type: " + valuesType); + throw new ParquetDecodingException("Unsupported values type: " + valuesType); } } @@ -214,19 +214,19 @@ public boolean usesDictionary() return false; } - public ParquetDictionary initDictionary(ColumnDescriptor descriptor, ParquetDictionaryPage dictionaryPage) + public Dictionary initDictionary(ColumnDescriptor descriptor, DictionaryPage dictionaryPage) throws IOException { - throw new UnsupportedOperationException("Parquet Dictionary encoding is not supported for: " + name()); + throw new UnsupportedOperationException(" Dictionary encoding is not supported for: " + name()); } - public ValuesReader getValuesReader(ColumnDescriptor descriptor, ParquetValuesType valuesType) + public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { - throw new UnsupportedOperationException("Error decoding Parquet values in encoding: " + this.name()); + throw new UnsupportedOperationException("Error decoding values in encoding: " + this.name()); } - public ValuesReader getDictionaryBasedValuesReader(ColumnDescriptor descriptor, ParquetValuesType valuesType, ParquetDictionary dictionary) + public ValuesReader getDictionaryBasedValuesReader(ColumnDescriptor descriptor, ValuesType valuesType, Dictionary dictionary) { - throw new UnsupportedOperationException("Parquet Dictionary encoding is not supported for: " + name()); + throw new UnsupportedOperationException(" Dictionary encoding is not supported for: " + name()); } } diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetTimestampUtils.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetTimestampUtils.java new file mode 100644 index 0000000000000..7a1e856633a6e --- /dev/null +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetTimestampUtils.java @@ -0,0 +1,61 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.parquet; + +import com.facebook.presto.spi.PrestoException; +import com.google.common.primitives.Ints; +import com.google.common.primitives.Longs; +import parquet.io.api.Binary; + +import java.util.concurrent.TimeUnit; + +import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; + +/** + * Utility class for decoding INT96 encoded parquet timestamp to timestamp millis in GMT. + *

+ */ +public final class ParquetTimestampUtils +{ + private static final int JULIAN_EPOCH_OFFSET_DAYS = 2_440_588; + private static final long MILLIS_IN_DAY = TimeUnit.DAYS.toMillis(1); + private static final long NANOS_PER_MILLISECOND = TimeUnit.MILLISECONDS.toNanos(1); + + private ParquetTimestampUtils() {} + + /** + * Returns GMT timestamp from binary encoded parquet timestamp (12 bytes - julian date + time of day nanos). + * + * @param timestampBinary INT96 parquet timestamp + * @return timestamp in millis, GMT timezone + */ + public static long getTimestampMillis(Binary timestampBinary) + { + if (timestampBinary.length() != 12) { + throw new PrestoException(NOT_SUPPORTED, "Parquet timestamp must be 12 bytes, actual " + timestampBinary.length()); + } + byte[] bytes = timestampBinary.getBytes(); + + // little endian encoding - need to invert byte order + long timeOfDayNanos = Longs.fromBytes(bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]); + int julianDay = Ints.fromBytes(bytes[11], bytes[10], bytes[9], bytes[8]); + + return julianDayToMillis(julianDay) + (timeOfDayNanos / NANOS_PER_MILLISECOND); + } + + private static long julianDayToMillis(int julianDay) + { + return (julianDay - JULIAN_EPOCH_OFFSET_DAYS) * MILLIS_IN_DAY; + } +} diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetTypeUtils.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetTypeUtils.java similarity index 95% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetTypeUtils.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetTypeUtils.java index 88c8bb895dc27..a2a2d0f961345 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetTypeUtils.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetTypeUtils.java @@ -11,9 +11,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; +package com.facebook.presto.parquet; -import com.facebook.presto.hive.HiveColumnHandle; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.predicate.Domain; import com.facebook.presto.spi.predicate.TupleDomain; @@ -214,18 +213,6 @@ public static int getFieldIndex(MessageType fileSchema, String name) } } - public static parquet.schema.Type getParquetType(HiveColumnHandle column, MessageType messageType, boolean useParquetColumnNames) - { - if (useParquetColumnNames) { - return getParquetTypeByName(column.getName(), messageType); - } - - if (column.getHiveColumnIndex() < messageType.getFieldCount()) { - return messageType.getType(column.getHiveColumnIndex()); - } - return null; - } - public static ParquetEncoding getParquetEncoding(Encoding encoding) { switch (encoding) { @@ -250,7 +237,7 @@ public static ParquetEncoding getParquetEncoding(Encoding encoding) } } - private static parquet.schema.Type getParquetTypeByName(String columnName, MessageType messageType) + public static parquet.schema.Type getParquetTypeByName(String columnName, MessageType messageType) { if (messageType.containsField(columnName)) { return messageType.getType(columnName); @@ -306,4 +293,21 @@ public static boolean isValueNull(boolean required, int definitionLevel, int max { return !required && (definitionLevel == maxDefinitionLevel - 1); } + + // copied from presto-hive DecimalUtils + public static long getShortDecimalValue(byte[] bytes) + { + long value = 0; + if ((bytes[0] & 0x80) != 0) { + for (int i = 0; i < 8 - bytes.length; ++i) { + value |= 0xFFL << (8 * (7 - i)); + } + } + + for (int i = 0; i < bytes.length; i++) { + value |= ((long) bytes[bytes.length - i - 1] & 0xFFL) << (8 * i); + } + + return value; + } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetValidationUtils.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetValidationUtils.java similarity index 95% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetValidationUtils.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetValidationUtils.java index aae8432aa9a74..1e4ca48c9c764 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetValidationUtils.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/ParquetValidationUtils.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; +package com.facebook.presto.parquet; import static java.lang.String.format; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/PrimitiveField.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/PrimitiveField.java similarity index 96% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/PrimitiveField.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/PrimitiveField.java index 4eca7ef9af7ed..71915444bba0a 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/PrimitiveField.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/PrimitiveField.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; +package com.facebook.presto.parquet; import com.facebook.presto.spi.type.Type; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/RichColumnDescriptor.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/RichColumnDescriptor.java similarity index 97% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/RichColumnDescriptor.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/RichColumnDescriptor.java index 4f69e3a46dc2d..364018e3a489a 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/RichColumnDescriptor.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/RichColumnDescriptor.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; +package com.facebook.presto.parquet; import parquet.column.ColumnDescriptor; import parquet.schema.PrimitiveType; diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/ValuesType.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/ValuesType.java new file mode 100644 index 0000000000000..66558498dfa3f --- /dev/null +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/ValuesType.java @@ -0,0 +1,21 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.parquet; + +public enum ValuesType +{ + REPETITION_LEVEL, + DEFINITION_LEVEL, + VALUES +} diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetBinaryDictionary.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/BinaryDictionary.java similarity index 85% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetBinaryDictionary.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/BinaryDictionary.java index 529e95c218367..98de7778662fd 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetBinaryDictionary.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/BinaryDictionary.java @@ -11,9 +11,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.dictionary; +package com.facebook.presto.parquet.dictionary; -import com.facebook.presto.hive.parquet.ParquetDictionaryPage; +import com.facebook.presto.parquet.DictionaryPage; import parquet.io.api.Binary; import java.io.IOException; @@ -22,18 +22,18 @@ import static com.google.common.base.Preconditions.checkArgument; import static parquet.bytes.BytesUtils.readIntLittleEndian; -public class ParquetBinaryDictionary - extends ParquetDictionary +public class BinaryDictionary + extends Dictionary { private final Binary[] content; - public ParquetBinaryDictionary(ParquetDictionaryPage dictionaryPage) + public BinaryDictionary(DictionaryPage dictionaryPage) throws IOException { this(dictionaryPage, null); } - public ParquetBinaryDictionary(ParquetDictionaryPage dictionaryPage, Integer length) + public BinaryDictionary(DictionaryPage dictionaryPage, Integer length) throws IOException { super(dictionaryPage.getEncoding()); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetDictionary.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/Dictionary.java similarity index 83% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetDictionary.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/Dictionary.java index 328992de7e80b..389ae428f03b9 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetDictionary.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/Dictionary.java @@ -11,22 +11,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.dictionary; +package com.facebook.presto.parquet.dictionary; -import com.facebook.presto.hive.parquet.ParquetEncoding; +import com.facebook.presto.parquet.ParquetEncoding; import parquet.io.api.Binary; import static com.google.common.base.Preconditions.checkArgument; -public abstract class ParquetDictionary +public abstract class Dictionary { private final ParquetEncoding encoding; - public ParquetDictionary(ParquetEncoding encoding) + public Dictionary(ParquetEncoding encoding) { checkArgument( encoding == ParquetEncoding.PLAIN_DICTIONARY || encoding == ParquetEncoding.PLAIN, - "Parquet dictionary does not support encoding: %s", encoding); + " dictionary does not support encoding: %s", encoding); this.encoding = encoding; } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetDictionaryReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/DictionaryReader.java similarity index 91% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetDictionaryReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/DictionaryReader.java index fca427e3e221c..714c1213e9cc1 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetDictionaryReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/DictionaryReader.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.dictionary; +package com.facebook.presto.parquet.dictionary; import parquet.bytes.BytesUtils; import parquet.column.values.ValuesReader; @@ -24,13 +24,13 @@ import static com.google.common.base.Preconditions.checkArgument; -public class ParquetDictionaryReader +public class DictionaryReader extends ValuesReader { - private final ParquetDictionary dictionary; + private final Dictionary dictionary; private RunLengthBitPackingHybridDecoder decoder; - public ParquetDictionaryReader(ParquetDictionary dictionary) + public DictionaryReader(Dictionary dictionary) { this.dictionary = dictionary; } @@ -39,7 +39,7 @@ public ParquetDictionaryReader(ParquetDictionary dictionary) public void initFromPage(int valueCount, byte[] page, int offset) throws IOException { - checkArgument(page.length > offset, "Attempt to read offset not in the Parquet page"); + checkArgument(page.length > offset, "Attempt to read offset not in the page"); ByteArrayInputStream in = new ByteArrayInputStream(page, offset, page.length - offset); int bitWidth = BytesUtils.readIntLittleEndianOnOneByte(in); decoder = new RunLengthBitPackingHybridDecoder(bitWidth, in); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetDoubleDictionary.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/DoubleDictionary.java similarity index 85% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetDoubleDictionary.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/DoubleDictionary.java index 825b043a708d2..9ba33b629e480 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetDoubleDictionary.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/DoubleDictionary.java @@ -11,21 +11,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.dictionary; +package com.facebook.presto.parquet.dictionary; -import com.facebook.presto.hive.parquet.ParquetDictionaryPage; +import com.facebook.presto.parquet.DictionaryPage; import parquet.column.values.plain.PlainValuesReader.DoublePlainValuesReader; import java.io.IOException; import static com.google.common.base.MoreObjects.toStringHelper; -public class ParquetDoubleDictionary - extends ParquetDictionary +public class DoubleDictionary + extends Dictionary { private final double[] content; - public ParquetDoubleDictionary(ParquetDictionaryPage dictionaryPage) + public DoubleDictionary(DictionaryPage dictionaryPage) throws IOException { super(dictionaryPage.getEncoding()); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetFloatDictionary.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/FloatDictionary.java similarity index 85% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetFloatDictionary.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/FloatDictionary.java index 2db9b64b81eb3..2ae885f53f6c3 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetFloatDictionary.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/FloatDictionary.java @@ -11,21 +11,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.dictionary; +package com.facebook.presto.parquet.dictionary; -import com.facebook.presto.hive.parquet.ParquetDictionaryPage; +import com.facebook.presto.parquet.DictionaryPage; import parquet.column.values.plain.PlainValuesReader.FloatPlainValuesReader; import java.io.IOException; import static com.google.common.base.MoreObjects.toStringHelper; -public class ParquetFloatDictionary - extends ParquetDictionary +public class FloatDictionary + extends Dictionary { private final float[] content; - public ParquetFloatDictionary(ParquetDictionaryPage dictionaryPage) + public FloatDictionary(DictionaryPage dictionaryPage) throws IOException { super(dictionaryPage.getEncoding()); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetIntegerDictionary.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/IntegerDictionary.java similarity index 84% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetIntegerDictionary.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/IntegerDictionary.java index c4660797876be..0d8fb9974fa5f 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetIntegerDictionary.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/IntegerDictionary.java @@ -11,21 +11,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.dictionary; +package com.facebook.presto.parquet.dictionary; -import com.facebook.presto.hive.parquet.ParquetDictionaryPage; +import com.facebook.presto.parquet.DictionaryPage; import parquet.column.values.plain.PlainValuesReader.IntegerPlainValuesReader; import java.io.IOException; import static com.google.common.base.MoreObjects.toStringHelper; -public class ParquetIntegerDictionary - extends ParquetDictionary +public class IntegerDictionary + extends Dictionary { private final int[] content; - public ParquetIntegerDictionary(ParquetDictionaryPage dictionaryPage) + public IntegerDictionary(DictionaryPage dictionaryPage) throws IOException { super(dictionaryPage.getEncoding()); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetLongDictionary.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/LongDictionary.java similarity index 85% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetLongDictionary.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/LongDictionary.java index adf2aeb60296d..a1b227d7f1134 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/dictionary/ParquetLongDictionary.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/dictionary/LongDictionary.java @@ -11,21 +11,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.dictionary; +package com.facebook.presto.parquet.dictionary; -import com.facebook.presto.hive.parquet.ParquetDictionaryPage; +import com.facebook.presto.parquet.DictionaryPage; import parquet.column.values.plain.PlainValuesReader.LongPlainValuesReader; import java.io.IOException; import static com.google.common.base.MoreObjects.toStringHelper; -public class ParquetLongDictionary - extends ParquetDictionary +public class LongDictionary + extends Dictionary { private final long[] content; - public ParquetLongDictionary(ParquetDictionaryPage dictionaryPage) + public LongDictionary(DictionaryPage dictionaryPage) throws IOException { super(dictionaryPage.getEncoding()); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetDictionaryDescriptor.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/DictionaryDescriptor.java similarity index 69% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetDictionaryDescriptor.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/DictionaryDescriptor.java index e2ba85cbe29e0..f789e391f809a 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetDictionaryDescriptor.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/DictionaryDescriptor.java @@ -11,19 +11,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.predicate; +package com.facebook.presto.parquet.predicate; -import com.facebook.presto.hive.parquet.ParquetDictionaryPage; +import com.facebook.presto.parquet.DictionaryPage; import parquet.column.ColumnDescriptor; import java.util.Optional; -public class ParquetDictionaryDescriptor +public class DictionaryDescriptor { private final ColumnDescriptor columnDescriptor; - private final Optional dictionaryPage; + private final Optional dictionaryPage; - public ParquetDictionaryDescriptor(ColumnDescriptor columnDescriptor, Optional dictionaryPage) + public DictionaryDescriptor(ColumnDescriptor columnDescriptor, Optional dictionaryPage) { this.columnDescriptor = columnDescriptor; this.dictionaryPage = dictionaryPage; @@ -34,7 +34,7 @@ public ColumnDescriptor getColumnDescriptor() return columnDescriptor; } - public Optional getDictionaryPage() + public Optional getDictionaryPage() { return dictionaryPage; } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetDoubleStatistics.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/ParquetDoubleStatistics.java similarity index 95% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetDoubleStatistics.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/ParquetDoubleStatistics.java index 8c70e0f93368f..1879663736f2a 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetDoubleStatistics.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/ParquetDoubleStatistics.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.predicate; +package com.facebook.presto.parquet.predicate; public class ParquetDoubleStatistics implements ParquetRangeStatistics diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetIntegerStatistics.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/ParquetIntegerStatistics.java similarity index 95% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetIntegerStatistics.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/ParquetIntegerStatistics.java index b5f8223a06269..46f0abde71bd2 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetIntegerStatistics.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/ParquetIntegerStatistics.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.predicate; +package com.facebook.presto.parquet.predicate; public class ParquetIntegerStatistics implements ParquetRangeStatistics diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetRangeStatistics.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/ParquetRangeStatistics.java similarity index 92% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetRangeStatistics.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/ParquetRangeStatistics.java index 6d0ff77f0c278..43f54e55b53f7 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetRangeStatistics.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/ParquetRangeStatistics.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.predicate; +package com.facebook.presto.parquet.predicate; public interface ParquetRangeStatistics { diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetStringStatistics.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/ParquetStringStatistics.java similarity index 95% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetStringStatistics.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/ParquetStringStatistics.java index 7722e66fdeb09..89fc21d8e25db 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetStringStatistics.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/ParquetStringStatistics.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.predicate; +package com.facebook.presto.parquet.predicate; import io.airlift.slice.Slice; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetPredicate.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/Predicate.java similarity index 82% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetPredicate.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/Predicate.java index f0b97b668c93b..611f71a587356 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetPredicate.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/Predicate.java @@ -11,16 +11,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.predicate; +package com.facebook.presto.parquet.predicate; import parquet.column.ColumnDescriptor; import parquet.column.statistics.Statistics; import java.util.Map; -public interface ParquetPredicate +public interface Predicate { - ParquetPredicate TRUE = new ParquetPredicate() + Predicate TRUE = new Predicate() { @Override public boolean matches(long numberOfRows, Map> statistics) @@ -29,7 +29,7 @@ public boolean matches(long numberOfRows, Map> s } @Override - public boolean matches(Map dictionaries) + public boolean matches(Map dictionaries) { return true; } @@ -49,5 +49,5 @@ public boolean matches(Map dictio * * @param dictionaries dictionaries per column */ - boolean matches(Map dictionaries); + boolean matches(Map dictionaries); } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetPredicateUtils.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/PredicateUtils.java similarity index 66% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetPredicateUtils.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/PredicateUtils.java index 1de48312d1580..6816d15ac19d1 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/ParquetPredicateUtils.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/PredicateUtils.java @@ -11,14 +11,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.predicate; - -import com.facebook.presto.hive.HiveColumnHandle; -import com.facebook.presto.hive.parquet.ParquetDataSource; -import com.facebook.presto.hive.parquet.ParquetDictionaryPage; -import com.facebook.presto.hive.parquet.ParquetEncoding; -import com.facebook.presto.hive.parquet.RichColumnDescriptor; -import com.facebook.presto.spi.predicate.Domain; +package com.facebook.presto.parquet.predicate; + +import com.facebook.presto.parquet.DictionaryPage; +import com.facebook.presto.parquet.ParquetDataSource; +import com.facebook.presto.parquet.ParquetEncoding; +import com.facebook.presto.parquet.RichColumnDescriptor; import com.facebook.presto.spi.predicate.TupleDomain; import com.facebook.presto.spi.type.Type; import com.google.common.annotations.VisibleForTesting; @@ -47,23 +45,21 @@ import java.util.Optional; import java.util.Set; -import static com.facebook.presto.hive.parquet.ParquetCompressionUtils.decompress; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetEncoding; +import static com.facebook.presto.parquet.ParquetCompressionUtils.decompress; +import static com.facebook.presto.parquet.ParquetTypeUtils.getParquetEncoding; import static com.facebook.presto.spi.type.IntegerType.INTEGER; import static com.facebook.presto.spi.type.SmallintType.SMALLINT; import static com.facebook.presto.spi.type.TinyintType.TINYINT; import static com.google.common.base.Verify.verify; import static io.airlift.slice.Slices.wrappedBuffer; import static java.lang.Math.toIntExact; -import static java.util.Map.Entry; -import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE; import static parquet.column.Encoding.BIT_PACKED; import static parquet.column.Encoding.PLAIN_DICTIONARY; import static parquet.column.Encoding.RLE; -public final class ParquetPredicateUtils +public final class PredicateUtils { - private ParquetPredicateUtils() + private PredicateUtils() { } @@ -76,29 +72,7 @@ public static boolean isStatisticsOverflow(Type type, ParquetIntegerStatistics p (type.equals(INTEGER) && (min < Integer.MIN_VALUE || max > Integer.MAX_VALUE)); } - public static TupleDomain getParquetTupleDomain(Map, RichColumnDescriptor> descriptorsByPath, TupleDomain effectivePredicate) - { - if (effectivePredicate.isNone()) { - return TupleDomain.none(); - } - - ImmutableMap.Builder predicate = ImmutableMap.builder(); - for (Entry entry : effectivePredicate.getDomains().get().entrySet()) { - HiveColumnHandle columnHandle = entry.getKey(); - // skip looking up predicates for complex types as Parquet only stores stats for primitives - if (!columnHandle.getHiveType().getCategory().equals(PRIMITIVE)) { - continue; - } - - RichColumnDescriptor descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName())); - if (descriptor != null) { - predicate.put(descriptor, entry.getValue()); - } - } - return TupleDomain.withColumnDomains(predicate.build()); - } - - public static ParquetPredicate buildParquetPredicate(MessageType requestedSchema, TupleDomain parquetTupleDomain, Map, RichColumnDescriptor> descriptorsByPath) + public static Predicate buildPredicate(MessageType requestedSchema, TupleDomain parquetTupleDomain, Map, RichColumnDescriptor> descriptorsByPath) { ImmutableList.Builder columnReferences = ImmutableList.builder(); for (String[] paths : requestedSchema.getPaths()) { @@ -110,14 +84,14 @@ public static ParquetPredicate buildParquetPredicate(MessageType requestedSchema return new TupleDomainParquetPredicate(parquetTupleDomain, columnReferences.build()); } - public static boolean predicateMatches(ParquetPredicate parquetPredicate, BlockMetaData block, ParquetDataSource dataSource, Map, RichColumnDescriptor> descriptorsByPath, TupleDomain parquetTupleDomain) + public static boolean predicateMatches(Predicate parquetPredicate, BlockMetaData block, ParquetDataSource dataSource, Map, RichColumnDescriptor> descriptorsByPath, TupleDomain parquetTupleDomain) { Map> columnStatistics = getStatistics(block, descriptorsByPath); if (!parquetPredicate.matches(block.getRowCount(), columnStatistics)) { return false; } - Map dictionaries = getDictionaries(block, dataSource, descriptorsByPath, parquetTupleDomain); + Map dictionaries = getDictionaries(block, dataSource, descriptorsByPath, parquetTupleDomain); return parquetPredicate.matches(dictionaries); } @@ -136,9 +110,9 @@ private static Map> getStatistics(BlockMetaData return statistics.build(); } - private static Map getDictionaries(BlockMetaData blockMetadata, ParquetDataSource dataSource, Map, RichColumnDescriptor> descriptorsByPath, TupleDomain parquetTupleDomain) + private static Map getDictionaries(BlockMetaData blockMetadata, ParquetDataSource dataSource, Map, RichColumnDescriptor> descriptorsByPath, TupleDomain parquetTupleDomain) { - ImmutableMap.Builder dictionaries = ImmutableMap.builder(); + ImmutableMap.Builder dictionaries = ImmutableMap.builder(); for (ColumnChunkMetaData columnMetaData : blockMetadata.getColumns()) { RichColumnDescriptor descriptor = descriptorsByPath.get(Arrays.asList(columnMetaData.getPath().toArray())); if (descriptor != null) { @@ -146,8 +120,8 @@ private static Map getDictionarie int totalSize = toIntExact(columnMetaData.getTotalSize()); byte[] buffer = new byte[totalSize]; dataSource.readFully(columnMetaData.getStartingPos(), buffer); - Optional dictionaryPage = readDictionaryPage(buffer, columnMetaData.getCodec()); - dictionaries.put(descriptor, new ParquetDictionaryDescriptor(descriptor, dictionaryPage)); + Optional dictionaryPage = readDictionaryPage(buffer, columnMetaData.getCodec()); + dictionaries.put(descriptor, new DictionaryDescriptor(descriptor, dictionaryPage)); break; } } @@ -155,7 +129,7 @@ private static Map getDictionarie return dictionaries.build(); } - private static Optional readDictionaryPage(byte[] data, CompressionCodecName codecName) + private static Optional readDictionaryPage(byte[] data, CompressionCodecName codecName) { try { ByteArrayInputStream inputStream = new ByteArrayInputStream(data); @@ -170,7 +144,7 @@ private static Optional readDictionaryPage(byte[] data, C ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name())); int dictionarySize = dicHeader.getNum_values(); - return Optional.of(new ParquetDictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding)); + return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding)); } catch (IOException ignored) { return Optional.empty(); @@ -185,7 +159,7 @@ private static boolean isColumnPredicate(ColumnDescriptor columnDescriptor, Tupl @VisibleForTesting @SuppressWarnings("deprecation") - static boolean isOnlyDictionaryEncodingPages(Set encodings) + public static boolean isOnlyDictionaryEncodingPages(Set encodings) { // TODO: update to use EncodingStats in ColumnChunkMetaData when available if (encodings.contains(PLAIN_DICTIONARY)) { diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/TupleDomainParquetPredicate.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/TupleDomainParquetPredicate.java similarity index 93% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/TupleDomainParquetPredicate.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/TupleDomainParquetPredicate.java index 598a7d7a83a16..50c7cc62b3f28 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/predicate/TupleDomainParquetPredicate.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/TupleDomainParquetPredicate.java @@ -11,11 +11,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.predicate; +package com.facebook.presto.parquet.predicate; -import com.facebook.presto.hive.parquet.ParquetDictionaryPage; -import com.facebook.presto.hive.parquet.RichColumnDescriptor; -import com.facebook.presto.hive.parquet.dictionary.ParquetDictionary; +import com.facebook.presto.parquet.DictionaryPage; +import com.facebook.presto.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.dictionary.Dictionary; import com.facebook.presto.spi.predicate.Domain; import com.facebook.presto.spi.predicate.Range; import com.facebook.presto.spi.predicate.TupleDomain; @@ -42,8 +42,8 @@ import java.util.Optional; import java.util.function.Function; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getPrestoType; -import static com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.isStatisticsOverflow; +import static com.facebook.presto.parquet.ParquetTypeUtils.getPrestoType; +import static com.facebook.presto.parquet.predicate.PredicateUtils.isStatisticsOverflow; import static com.facebook.presto.spi.type.BigintType.BIGINT; import static com.facebook.presto.spi.type.BooleanType.BOOLEAN; import static com.facebook.presto.spi.type.DoubleType.DOUBLE; @@ -56,7 +56,7 @@ import static java.util.Objects.requireNonNull; public class TupleDomainParquetPredicate - implements ParquetPredicate + implements Predicate { private final TupleDomain effectivePredicate; private final List columns; @@ -95,12 +95,12 @@ public boolean matches(long numberOfRows, Map> s } @Override - public boolean matches(Map dictionaries) + public boolean matches(Map dictionaries) { ImmutableMap.Builder domains = ImmutableMap.builder(); for (RichColumnDescriptor column : columns) { - ParquetDictionaryDescriptor dictionaryDescriptor = dictionaries.get(column); + DictionaryDescriptor dictionaryDescriptor = dictionaries.get(column); Domain domain = getDomain(getPrestoType(effectivePredicate, column), dictionaryDescriptor); if (domain != null) { domains.put(column, domain); @@ -204,19 +204,19 @@ else if (isVarcharType(type) && statistics instanceof BinaryStatistics) { } @VisibleForTesting - public static Domain getDomain(Type type, ParquetDictionaryDescriptor dictionaryDescriptor) + public static Domain getDomain(Type type, DictionaryDescriptor dictionaryDescriptor) { if (dictionaryDescriptor == null) { return null; } ColumnDescriptor columnDescriptor = dictionaryDescriptor.getColumnDescriptor(); - Optional dictionaryPage = dictionaryDescriptor.getDictionaryPage(); + Optional dictionaryPage = dictionaryDescriptor.getDictionaryPage(); if (!dictionaryPage.isPresent()) { return null; } - ParquetDictionary dictionary; + Dictionary dictionary; try { dictionary = dictionaryPage.get().getEncoding().initDictionary(columnDescriptor, dictionaryPage.get()); } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetBinaryColumnReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/BinaryColumnReader.java similarity index 88% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetBinaryColumnReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/BinaryColumnReader.java index d63b4b60ed9dd..2722e028604ff 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetBinaryColumnReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/BinaryColumnReader.java @@ -11,9 +11,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -import com.facebook.presto.hive.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.RichColumnDescriptor; import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.type.Type; import io.airlift.slice.Slice; @@ -26,10 +26,10 @@ import static io.airlift.slice.Slices.EMPTY_SLICE; import static io.airlift.slice.Slices.wrappedBuffer; -public class ParquetBinaryColumnReader - extends ParquetPrimitiveColumnReader +public class BinaryColumnReader + extends PrimitiveColumnReader { - public ParquetBinaryColumnReader(RichColumnDescriptor descriptor) + public BinaryColumnReader(RichColumnDescriptor descriptor) { super(descriptor); } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetBooleanColumnReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/BooleanColumnReader.java similarity index 82% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetBooleanColumnReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/BooleanColumnReader.java index 3d1f0225f4dc4..24bc03236a299 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetBooleanColumnReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/BooleanColumnReader.java @@ -11,16 +11,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -import com.facebook.presto.hive.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.RichColumnDescriptor; import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.type.Type; -public class ParquetBooleanColumnReader - extends ParquetPrimitiveColumnReader +public class BooleanColumnReader + extends PrimitiveColumnReader { - public ParquetBooleanColumnReader(RichColumnDescriptor descriptor) + public BooleanColumnReader(RichColumnDescriptor descriptor) { super(descriptor); } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ColumnChunk.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ColumnChunk.java similarity index 96% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ColumnChunk.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ColumnChunk.java index 1cfcabe844418..f4a1a8dcf3dcc 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ColumnChunk.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ColumnChunk.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; import com.facebook.presto.spi.block.Block; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetColumnChunkDescriptor.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ColumnChunkDescriptor.java similarity index 90% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetColumnChunkDescriptor.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ColumnChunkDescriptor.java index 65a148a29241e..388ca74b0f041 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetColumnChunkDescriptor.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ColumnChunkDescriptor.java @@ -11,18 +11,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; import parquet.column.ColumnDescriptor; import parquet.hadoop.metadata.ColumnChunkMetaData; -public class ParquetColumnChunkDescriptor +public class ColumnChunkDescriptor { private final ColumnDescriptor columnDescriptor; private final ColumnChunkMetaData columnChunkMetaData; private final int size; - public ParquetColumnChunkDescriptor( + public ColumnChunkDescriptor( ColumnDescriptor columnDescriptor, ColumnChunkMetaData columnChunkMetaData, int size) diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetDecimalColumnReaderFactory.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/DecimalColumnReaderFactory.java similarity index 62% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetDecimalColumnReaderFactory.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/DecimalColumnReaderFactory.java index 4903f3811fc6b..4616f0563bc56 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetDecimalColumnReaderFactory.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/DecimalColumnReaderFactory.java @@ -11,23 +11,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -import com.facebook.presto.hive.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.RichColumnDescriptor; import com.facebook.presto.spi.type.DecimalType; -public final class ParquetDecimalColumnReaderFactory +public final class DecimalColumnReaderFactory { - private ParquetDecimalColumnReaderFactory() {} + private DecimalColumnReaderFactory() {} - public static ParquetPrimitiveColumnReader createReader(RichColumnDescriptor descriptor, int precision, int scale) + public static PrimitiveColumnReader createReader(RichColumnDescriptor descriptor, int precision, int scale) { DecimalType decimalType = DecimalType.createDecimalType(precision, scale); if (decimalType.isShort()) { - return new ParquetShortDecimalColumnReader(descriptor); + return new ShortDecimalColumnReader(descriptor); } else { - return new ParquetLongDecimalColumnReader(descriptor); + return new LongDecimalColumnReader(descriptor); } } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetDoubleColumnReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/DoubleColumnReader.java similarity index 82% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetDoubleColumnReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/DoubleColumnReader.java index 416ae35f10dc9..5adb4ee8fbd3e 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetDoubleColumnReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/DoubleColumnReader.java @@ -11,16 +11,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -import com.facebook.presto.hive.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.RichColumnDescriptor; import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.type.Type; -public class ParquetDoubleColumnReader - extends ParquetPrimitiveColumnReader +public class DoubleColumnReader + extends PrimitiveColumnReader { - public ParquetDoubleColumnReader(RichColumnDescriptor descriptor) + public DoubleColumnReader(RichColumnDescriptor descriptor) { super(descriptor); } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetFloatColumnReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/FloatColumnReader.java similarity index 82% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetFloatColumnReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/FloatColumnReader.java index 559b3e6d2f10f..d8e8253d65cf4 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetFloatColumnReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/FloatColumnReader.java @@ -11,18 +11,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -import com.facebook.presto.hive.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.RichColumnDescriptor; import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.type.Type; import static java.lang.Float.floatToRawIntBits; -public class ParquetFloatColumnReader - extends ParquetPrimitiveColumnReader +public class FloatColumnReader + extends PrimitiveColumnReader { - public ParquetFloatColumnReader(RichColumnDescriptor descriptor) + public FloatColumnReader(RichColumnDescriptor descriptor) { super(descriptor); } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetIntColumnReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/IntColumnReader.java similarity index 82% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetIntColumnReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/IntColumnReader.java index 8564ef56cfdee..33e5d3d477388 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetIntColumnReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/IntColumnReader.java @@ -11,16 +11,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -import com.facebook.presto.hive.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.RichColumnDescriptor; import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.type.Type; -public class ParquetIntColumnReader - extends ParquetPrimitiveColumnReader +public class IntColumnReader + extends PrimitiveColumnReader { - public ParquetIntColumnReader(RichColumnDescriptor descriptor) + public IntColumnReader(RichColumnDescriptor descriptor) { super(descriptor); } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLevelNullReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LevelNullReader.java similarity index 83% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLevelNullReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LevelNullReader.java index 444f6b31e051e..04004504d39f8 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLevelNullReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LevelNullReader.java @@ -11,10 +11,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -public class ParquetLevelNullReader - implements ParquetLevelReader +public class LevelNullReader + implements LevelReader { @Override public int readLevel() diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLevelRLEReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LevelRLEReader.java similarity index 83% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLevelRLEReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LevelRLEReader.java index 60d04f7e7766f..f64ef584d9808 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLevelRLEReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LevelRLEReader.java @@ -11,19 +11,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; import parquet.column.values.rle.RunLengthBitPackingHybridDecoder; import parquet.io.ParquetDecodingException; import java.io.IOException; -public class ParquetLevelRLEReader - implements ParquetLevelReader +public class LevelRLEReader + implements LevelReader { private final RunLengthBitPackingHybridDecoder delegate; - public ParquetLevelRLEReader(RunLengthBitPackingHybridDecoder delegate) + public LevelRLEReader(RunLengthBitPackingHybridDecoder delegate) { this.delegate = delegate; } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLevelReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LevelReader.java similarity index 87% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLevelReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LevelReader.java index 2bfe049128ace..4028afdd99c0f 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLevelReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LevelReader.java @@ -11,9 +11,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -public interface ParquetLevelReader +public interface LevelReader { int readLevel(); } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLevelValuesReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LevelValuesReader.java similarity index 81% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLevelValuesReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LevelValuesReader.java index 6e36cc9e3ee57..622ccdb4501bd 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLevelValuesReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LevelValuesReader.java @@ -11,16 +11,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; import parquet.column.values.ValuesReader; -public class ParquetLevelValuesReader - implements ParquetLevelReader +public class LevelValuesReader + implements LevelReader { private final ValuesReader delegate; - public ParquetLevelValuesReader(ValuesReader delegate) + public LevelValuesReader(ValuesReader delegate) { this.delegate = delegate; } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetListColumnReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ListColumnReader.java similarity index 94% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetListColumnReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ListColumnReader.java index d24fc55d86c94..71773540cd998 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetListColumnReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ListColumnReader.java @@ -11,16 +11,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -import com.facebook.presto.hive.parquet.Field; -import com.facebook.presto.hive.parquet.ParquetTypeUtils; +import com.facebook.presto.parquet.Field; +import com.facebook.presto.parquet.ParquetTypeUtils; import it.unimi.dsi.fastutil.booleans.BooleanList; import it.unimi.dsi.fastutil.ints.IntList; -public class ParquetListColumnReader +public class ListColumnReader { - private ParquetListColumnReader() + private ListColumnReader() { } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLongColumnReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LongColumnReader.java similarity index 82% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLongColumnReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LongColumnReader.java index 7c1da0904c502..a4284b5fb8964 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLongColumnReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LongColumnReader.java @@ -11,16 +11,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -import com.facebook.presto.hive.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.RichColumnDescriptor; import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.type.Type; -public class ParquetLongColumnReader - extends ParquetPrimitiveColumnReader +public class LongColumnReader + extends PrimitiveColumnReader { - public ParquetLongColumnReader(RichColumnDescriptor descriptor) + public LongColumnReader(RichColumnDescriptor descriptor) { super(descriptor); } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLongDecimalColumnReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LongDecimalColumnReader.java similarity index 84% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLongDecimalColumnReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LongDecimalColumnReader.java index b2a3ad711283b..f090080c2214c 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetLongDecimalColumnReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/LongDecimalColumnReader.java @@ -11,9 +11,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -import com.facebook.presto.hive.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.RichColumnDescriptor; import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.type.Decimals; import com.facebook.presto.spi.type.Type; @@ -21,10 +21,10 @@ import java.math.BigInteger; -public class ParquetLongDecimalColumnReader - extends ParquetPrimitiveColumnReader +public class LongDecimalColumnReader + extends PrimitiveColumnReader { - ParquetLongDecimalColumnReader(RichColumnDescriptor descriptor) + LongDecimalColumnReader(RichColumnDescriptor descriptor) { super(descriptor); } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetMetadataReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/MetadataReader.java similarity index 98% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetMetadataReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/MetadataReader.java index f45ef6373cb0c..bfe31878c9f2f 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetMetadataReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/MetadataReader.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; @@ -51,16 +51,16 @@ import java.util.Map; import java.util.Set; -import static com.facebook.presto.hive.parquet.ParquetValidationUtils.validateParquet; +import static com.facebook.presto.parquet.ParquetValidationUtils.validateParquet; import static java.nio.charset.StandardCharsets.US_ASCII; import static parquet.format.Util.readFileMetaData; -public final class ParquetMetadataReader +public final class MetadataReader { private static final int PARQUET_METADATA_LENGTH = 4; private static final byte[] MAGIC = "PAR1".getBytes(US_ASCII); - private ParquetMetadataReader() {} + private MetadataReader() {} public static ParquetMetadata readFooter(FileSystem fileSystem, Path file, long fileSize) throws IOException diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetPageReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/PageReader.java similarity index 72% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetPageReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/PageReader.java index 4302c3be22fea..6e036e2be9b9e 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetPageReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/PageReader.java @@ -11,37 +11,37 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -import com.facebook.presto.hive.parquet.ParquetDataPage; -import com.facebook.presto.hive.parquet.ParquetDataPageV1; -import com.facebook.presto.hive.parquet.ParquetDataPageV2; -import com.facebook.presto.hive.parquet.ParquetDictionaryPage; +import com.facebook.presto.parquet.DataPage; +import com.facebook.presto.parquet.DataPageV1; +import com.facebook.presto.parquet.DataPageV2; +import com.facebook.presto.parquet.DictionaryPage; import parquet.hadoop.metadata.CompressionCodecName; import java.io.IOException; import java.util.LinkedList; import java.util.List; -import static com.facebook.presto.hive.parquet.ParquetCompressionUtils.decompress; +import static com.facebook.presto.parquet.ParquetCompressionUtils.decompress; import static java.lang.Math.toIntExact; -class ParquetPageReader +class PageReader { private final CompressionCodecName codec; private final long valueCount; - private final List compressedPages; - private final ParquetDictionaryPage compressedDictionaryPage; + private final List compressedPages; + private final DictionaryPage compressedDictionaryPage; - public ParquetPageReader(CompressionCodecName codec, - List compressedPages, - ParquetDictionaryPage compressedDictionaryPage) + public PageReader(CompressionCodecName codec, + List compressedPages, + DictionaryPage compressedDictionaryPage) { this.codec = codec; this.compressedPages = new LinkedList<>(compressedPages); this.compressedDictionaryPage = compressedDictionaryPage; int count = 0; - for (ParquetDataPage page : compressedPages) { + for (DataPage page : compressedPages) { count += page.getValueCount(); } this.valueCount = count; @@ -52,16 +52,16 @@ public long getTotalValueCount() return valueCount; } - public ParquetDataPage readPage() + public DataPage readPage() { if (compressedPages.isEmpty()) { return null; } - ParquetDataPage compressedPage = compressedPages.remove(0); + DataPage compressedPage = compressedPages.remove(0); try { - if (compressedPage instanceof ParquetDataPageV1) { - ParquetDataPageV1 dataPageV1 = (ParquetDataPageV1) compressedPage; - return new ParquetDataPageV1( + if (compressedPage instanceof DataPageV1) { + DataPageV1 dataPageV1 = (DataPageV1) compressedPage; + return new DataPageV1( decompress(codec, dataPageV1.getSlice(), dataPageV1.getUncompressedSize()), dataPageV1.getValueCount(), dataPageV1.getUncompressedSize(), @@ -71,14 +71,14 @@ public ParquetDataPage readPage() dataPageV1.getValueEncoding()); } else { - ParquetDataPageV2 dataPageV2 = (ParquetDataPageV2) compressedPage; + DataPageV2 dataPageV2 = (DataPageV2) compressedPage; if (!dataPageV2.isCompressed()) { return dataPageV2; } int uncompressedSize = toIntExact(dataPageV2.getUncompressedSize() - dataPageV2.getDefinitionLevels().length() - dataPageV2.getRepetitionLevels().length()); - return new ParquetDataPageV2( + return new DataPageV2( dataPageV2.getRowCount(), dataPageV2.getNullCount(), dataPageV2.getValueCount(), @@ -96,13 +96,13 @@ public ParquetDataPage readPage() } } - public ParquetDictionaryPage readDictionaryPage() + public DictionaryPage readDictionaryPage() { if (compressedDictionaryPage == null) { return null; } try { - return new ParquetDictionaryPage( + return new DictionaryPage( decompress(codec, compressedDictionaryPage.getSlice(), compressedDictionaryPage.getUncompressedSize()), compressedDictionaryPage.getDictionarySize(), compressedDictionaryPage.getEncoding()); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetColumnChunk.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ParquetColumnChunk.java similarity index 79% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetColumnChunk.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ParquetColumnChunk.java index 188ae23d5259f..c54c1628b0462 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetColumnChunk.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ParquetColumnChunk.java @@ -11,13 +11,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -import com.facebook.presto.hive.parquet.ParquetCorruptionException; -import com.facebook.presto.hive.parquet.ParquetDataPage; -import com.facebook.presto.hive.parquet.ParquetDataPageV1; -import com.facebook.presto.hive.parquet.ParquetDataPageV2; -import com.facebook.presto.hive.parquet.ParquetDictionaryPage; +import com.facebook.presto.parquet.DataPage; +import com.facebook.presto.parquet.DataPageV1; +import com.facebook.presto.parquet.DataPageV2; +import com.facebook.presto.parquet.DictionaryPage; +import com.facebook.presto.parquet.ParquetCorruptionException; import io.airlift.slice.Slice; import parquet.column.Encoding; import parquet.format.DataPageHeader; @@ -31,16 +31,16 @@ import java.util.ArrayList; import java.util.List; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetEncoding; +import static com.facebook.presto.parquet.ParquetTypeUtils.getParquetEncoding; import static io.airlift.slice.Slices.wrappedBuffer; public class ParquetColumnChunk extends ByteArrayInputStream { - private final ParquetColumnChunkDescriptor descriptor; + private final ColumnChunkDescriptor descriptor; public ParquetColumnChunk( - ParquetColumnChunkDescriptor descriptor, + ColumnChunkDescriptor descriptor, byte[] data, int offset) { @@ -49,7 +49,7 @@ public ParquetColumnChunk( this.pos = offset; } - public ParquetColumnChunkDescriptor getDescriptor() + public ColumnChunkDescriptor getDescriptor() { return descriptor; } @@ -60,11 +60,11 @@ protected PageHeader readPageHeader() return Util.readPageHeader(this); } - public ParquetPageReader readAllPages() + public PageReader readAllPages() throws IOException { - List pages = new ArrayList<>(); - ParquetDictionaryPage dictionaryPage = null; + List pages = new ArrayList<>(); + DictionaryPage dictionaryPage = null; long valueCount = 0; while (valueCount < descriptor.getColumnChunkMetaData().getValueCount()) { PageHeader pageHeader = readPageHeader(); @@ -88,7 +88,7 @@ public ParquetPageReader readAllPages() break; } } - return new ParquetPageReader(descriptor.getColumnChunkMetaData().getCodec(), pages, dictionaryPage); + return new PageReader(descriptor.getColumnChunkMetaData().getCodec(), pages, dictionaryPage); } public int getPosition() @@ -103,10 +103,10 @@ private Slice getSlice(int size) return slice; } - private ParquetDictionaryPage readDictionaryPage(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize) + private DictionaryPage readDictionaryPage(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize) { DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); - return new ParquetDictionaryPage( + return new DictionaryPage( getSlice(compressedPageSize), uncompressedPageSize, dicHeader.getNum_values(), @@ -116,14 +116,14 @@ private ParquetDictionaryPage readDictionaryPage(PageHeader pageHeader, int unco private long readDataPageV1(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize, - List pages) + List pages) { DataPageHeader dataHeaderV1 = pageHeader.getData_page_header(); - pages.add(new ParquetDataPageV1( + pages.add(new DataPageV1( getSlice(compressedPageSize), dataHeaderV1.getNum_values(), uncompressedPageSize, - ParquetMetadataReader.readStats( + MetadataReader.readStats( dataHeaderV1.getStatistics(), descriptor.getColumnDescriptor().getType()), getParquetEncoding(Encoding.valueOf(dataHeaderV1.getRepetition_level_encoding().name())), @@ -135,11 +135,11 @@ private long readDataPageV1(PageHeader pageHeader, private long readDataPageV2(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize, - List pages) + List pages) { DataPageHeaderV2 dataHeaderV2 = pageHeader.getData_page_header_v2(); int dataSize = compressedPageSize - dataHeaderV2.getRepetition_levels_byte_length() - dataHeaderV2.getDefinition_levels_byte_length(); - pages.add(new ParquetDataPageV2( + pages.add(new DataPageV2( dataHeaderV2.getNum_rows(), dataHeaderV2.getNum_nulls(), dataHeaderV2.getNum_values(), @@ -148,7 +148,7 @@ private long readDataPageV2(PageHeader pageHeader, getParquetEncoding(Encoding.valueOf(dataHeaderV2.getEncoding().name())), getSlice(dataSize), uncompressedPageSize, - ParquetMetadataReader.readStats( + MetadataReader.readStats( dataHeaderV2.getStatistics(), descriptor.getColumnDescriptor().getType()), dataHeaderV2.isIs_compressed())); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ParquetReader.java similarity index 89% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ParquetReader.java index 684e60f37f89d..570bd37bd2a98 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ParquetReader.java @@ -11,16 +11,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -import com.facebook.presto.hive.parquet.Field; -import com.facebook.presto.hive.parquet.GroupField; -import com.facebook.presto.hive.parquet.ParquetCorruptionException; -import com.facebook.presto.hive.parquet.ParquetDataSource; -import com.facebook.presto.hive.parquet.PrimitiveField; -import com.facebook.presto.hive.parquet.RichColumnDescriptor; import com.facebook.presto.memory.context.AggregatedMemoryContext; import com.facebook.presto.memory.context.LocalMemoryContext; +import com.facebook.presto.parquet.Field; +import com.facebook.presto.parquet.GroupField; +import com.facebook.presto.parquet.ParquetCorruptionException; +import com.facebook.presto.parquet.ParquetDataSource; +import com.facebook.presto.parquet.PrimitiveField; +import com.facebook.presto.parquet.RichColumnDescriptor; import com.facebook.presto.spi.block.ArrayBlock; import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.block.RowBlock; @@ -45,8 +45,8 @@ import java.util.List; import java.util.Optional; -import static com.facebook.presto.hive.parquet.ParquetValidationUtils.validateParquet; -import static com.facebook.presto.hive.parquet.reader.ParquetListColumnReader.calculateCollectionOffsets; +import static com.facebook.presto.parquet.ParquetValidationUtils.validateParquet; +import static com.facebook.presto.parquet.reader.ListColumnReader.calculateCollectionOffsets; import static com.facebook.presto.spi.type.StandardTypes.ARRAY; import static com.facebook.presto.spi.type.StandardTypes.MAP; import static com.facebook.presto.spi.type.StandardTypes.ROW; @@ -71,7 +71,7 @@ public class ParquetReader private long currentGroupRowCount; private long nextRowInGroup; private int batchSize; - private final ParquetPrimitiveColumnReader[] columnReaders; + private final PrimitiveColumnReader[] columnReaders; private AggregatedMemoryContext currentRowGroupMemoryContext; @@ -85,7 +85,7 @@ public ParquetReader(MessageColumnIO messageColumnIO, this.systemMemoryContext = requireNonNull(systemMemoryContext, "systemMemoryContext is null"); this.currentRowGroupMemoryContext = systemMemoryContext.newAggregatedMemoryContext(); columns = messageColumnIO.getLeaves(); - columnReaders = new ParquetPrimitiveColumnReader[columns.size()]; + columnReaders = new PrimitiveColumnReader[columns.size()]; } @Override @@ -184,7 +184,7 @@ private ColumnChunk readStruct(GroupField field) blocks[i] = RunLengthEncodedBlock.create(field.getType(), null, columnChunk.getBlock().getPositionCount()); } } - BooleanList structIsNull = ParquetStructColumnReader.calculateStructOffsets(field, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels()); + BooleanList structIsNull = StructColumnReader.calculateStructOffsets(field, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels()); boolean[] structIsNullVector = structIsNull.toBooleanArray(); Block rowBlock = RowBlock.fromFieldBlocks(structIsNullVector.length, Optional.of(structIsNullVector), blocks); return new ColumnChunk(rowBlock, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels()); @@ -194,7 +194,7 @@ private ColumnChunk readPrimitive(PrimitiveField field) throws IOException { ColumnDescriptor columnDescriptor = field.getDescriptor(); - ParquetPrimitiveColumnReader columnReader = columnReaders[field.getId()]; + PrimitiveColumnReader columnReader = columnReaders[field.getId()]; if (columnReader.getPageReader() == null) { validateParquet(currentBlockMetadata.getRowCount() > 0, "Row group has 0 rows"); ColumnChunkMetaData metadata = getColumnChunkMetaData(columnDescriptor); @@ -202,7 +202,7 @@ private ColumnChunk readPrimitive(PrimitiveField field) int totalSize = toIntExact(metadata.getTotalSize()); byte[] buffer = allocateBlock(totalSize); dataSource.readFully(startingPosition, buffer); - ParquetColumnChunkDescriptor descriptor = new ParquetColumnChunkDescriptor(columnDescriptor, metadata, totalSize); + ColumnChunkDescriptor descriptor = new ColumnChunkDescriptor(columnDescriptor, metadata, totalSize); ParquetColumnChunk columnChunk = new ParquetColumnChunk(descriptor, buffer, 0); columnReader.setPageReader(columnChunk.readAllPages()); } @@ -232,7 +232,7 @@ private void initializeColumnReaders() { for (PrimitiveColumnIO columnIO : columns) { RichColumnDescriptor column = new RichColumnDescriptor(columnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType()); - columnReaders[columnIO.getId()] = ParquetPrimitiveColumnReader.createReader(column); + columnReaders[columnIO.getId()] = PrimitiveColumnReader.createReader(column); } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetPrimitiveColumnReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/PrimitiveColumnReader.java similarity index 76% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetPrimitiveColumnReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/PrimitiveColumnReader.java index db60d55af3633..e2ad69008617a 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetPrimitiveColumnReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/PrimitiveColumnReader.java @@ -11,17 +11,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -import com.facebook.presto.hive.parquet.Field; -import com.facebook.presto.hive.parquet.ParquetDataPage; -import com.facebook.presto.hive.parquet.ParquetDataPageV1; -import com.facebook.presto.hive.parquet.ParquetDataPageV2; -import com.facebook.presto.hive.parquet.ParquetDictionaryPage; -import com.facebook.presto.hive.parquet.ParquetEncoding; -import com.facebook.presto.hive.parquet.ParquetTypeUtils; -import com.facebook.presto.hive.parquet.RichColumnDescriptor; -import com.facebook.presto.hive.parquet.dictionary.ParquetDictionary; +import com.facebook.presto.parquet.DataPage; +import com.facebook.presto.parquet.DataPageV1; +import com.facebook.presto.parquet.DataPageV2; +import com.facebook.presto.parquet.DictionaryPage; +import com.facebook.presto.parquet.Field; +import com.facebook.presto.parquet.ParquetEncoding; +import com.facebook.presto.parquet.ParquetTypeUtils; +import com.facebook.presto.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.dictionary.Dictionary; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.type.DecimalType; @@ -40,16 +40,16 @@ import java.util.Optional; import java.util.function.Consumer; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.createDecimalType; -import static com.facebook.presto.hive.parquet.ParquetValuesType.DEFINITION_LEVEL; -import static com.facebook.presto.hive.parquet.ParquetValuesType.REPETITION_LEVEL; -import static com.facebook.presto.hive.parquet.ParquetValuesType.VALUES; +import static com.facebook.presto.parquet.ParquetTypeUtils.createDecimalType; +import static com.facebook.presto.parquet.ValuesType.DEFINITION_LEVEL; +import static com.facebook.presto.parquet.ValuesType.REPETITION_LEVEL; +import static com.facebook.presto.parquet.ValuesType.VALUES; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Verify.verify; import static java.util.Objects.requireNonNull; -public abstract class ParquetPrimitiveColumnReader +public abstract class PrimitiveColumnReader { private static final int EMPTY_LEVEL_VALUE = -1; protected final RichColumnDescriptor columnDescriptor; @@ -59,13 +59,13 @@ public abstract class ParquetPrimitiveColumnReader protected ValuesReader valuesReader; private int nextBatchSize; - private ParquetLevelReader repetitionReader; - private ParquetLevelReader definitionReader; + private LevelReader repetitionReader; + private LevelReader definitionReader; private long totalValueCount; - private ParquetPageReader pageReader; - private ParquetDictionary dictionary; + private PageReader pageReader; + private Dictionary dictionary; private int currentValueCount; - private ParquetDataPage page; + private DataPage page; private int remainingValueCountInPage; private int readOffset; @@ -78,56 +78,56 @@ protected boolean isValueNull() return ParquetTypeUtils.isValueNull(columnDescriptor.isRequired(), definitionLevel, columnDescriptor.getMaxDefinitionLevel()); } - public static ParquetPrimitiveColumnReader createReader(RichColumnDescriptor descriptor) + public static PrimitiveColumnReader createReader(RichColumnDescriptor descriptor) { switch (descriptor.getType()) { case BOOLEAN: - return new ParquetBooleanColumnReader(descriptor); + return new BooleanColumnReader(descriptor); case INT32: - return createDecimalColumnReader(descriptor).orElse(new ParquetIntColumnReader(descriptor)); + return createDecimalColumnReader(descriptor).orElse(new IntColumnReader(descriptor)); case INT64: - return createDecimalColumnReader(descriptor).orElse(new ParquetLongColumnReader(descriptor)); + return createDecimalColumnReader(descriptor).orElse(new LongColumnReader(descriptor)); case INT96: - return new ParquetTimestampColumnReader(descriptor); + return new TimestampColumnReader(descriptor); case FLOAT: - return new ParquetFloatColumnReader(descriptor); + return new FloatColumnReader(descriptor); case DOUBLE: - return new ParquetDoubleColumnReader(descriptor); + return new DoubleColumnReader(descriptor); case BINARY: - return createDecimalColumnReader(descriptor).orElse(new ParquetBinaryColumnReader(descriptor)); + return createDecimalColumnReader(descriptor).orElse(new BinaryColumnReader(descriptor)); case FIXED_LEN_BYTE_ARRAY: return createDecimalColumnReader(descriptor) - .orElseThrow(() -> new PrestoException(NOT_SUPPORTED, "Parquet type FIXED_LEN_BYTE_ARRAY supported as DECIMAL; got " + descriptor.getPrimitiveType().getOriginalType())); + .orElseThrow(() -> new PrestoException(NOT_SUPPORTED, " type FIXED_LEN_BYTE_ARRAY supported as DECIMAL; got " + descriptor.getPrimitiveType().getOriginalType())); default: throw new PrestoException(NOT_SUPPORTED, "Unsupported parquet type: " + descriptor.getType()); } } - private static Optional createDecimalColumnReader(RichColumnDescriptor descriptor) + private static Optional createDecimalColumnReader(RichColumnDescriptor descriptor) { Optional type = createDecimalType(descriptor); if (type.isPresent()) { DecimalType decimalType = (DecimalType) type.get(); - return Optional.of(ParquetDecimalColumnReaderFactory.createReader(descriptor, decimalType.getPrecision(), decimalType.getScale())); + return Optional.of(DecimalColumnReaderFactory.createReader(descriptor, decimalType.getPrecision(), decimalType.getScale())); } return Optional.empty(); } - public ParquetPrimitiveColumnReader(RichColumnDescriptor columnDescriptor) + public PrimitiveColumnReader(RichColumnDescriptor columnDescriptor) { this.columnDescriptor = requireNonNull(columnDescriptor, "columnDescriptor"); pageReader = null; } - public ParquetPageReader getPageReader() + public PageReader getPageReader() { return pageReader; } - public void setPageReader(ParquetPageReader pageReader) + public void setPageReader(PageReader pageReader) { this.pageReader = requireNonNull(pageReader, "pageReader"); - ParquetDictionaryPage dictionaryPage = pageReader.readDictionaryPage(); + DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); if (dictionaryPage != null) { try { @@ -245,11 +245,11 @@ private boolean readNextPage() return false; } remainingValueCountInPage = page.getValueCount(); - if (page instanceof ParquetDataPageV1) { - valuesReader = readPageV1((ParquetDataPageV1) page); + if (page instanceof DataPageV1) { + valuesReader = readPageV1((DataPageV1) page); } else { - valuesReader = readPageV2((ParquetDataPageV2) page); + valuesReader = readPageV2((DataPageV2) page); } return true; } @@ -264,12 +264,12 @@ private void updateValueCounts(int valuesRead) currentValueCount += valuesRead; } - private ValuesReader readPageV1(ParquetDataPageV1 page) + private ValuesReader readPageV1(DataPageV1 page) { ValuesReader rlReader = page.getRepetitionLevelEncoding().getValuesReader(columnDescriptor, REPETITION_LEVEL); ValuesReader dlReader = page.getDefinitionLevelEncoding().getValuesReader(columnDescriptor, DEFINITION_LEVEL); - repetitionReader = new ParquetLevelValuesReader(rlReader); - definitionReader = new ParquetLevelValuesReader(dlReader); + repetitionReader = new LevelValuesReader(rlReader); + definitionReader = new LevelValuesReader(dlReader); try { byte[] bytes = page.getSlice().getBytes(); rlReader.initFromPage(page.getValueCount(), bytes, 0); @@ -283,19 +283,19 @@ private ValuesReader readPageV1(ParquetDataPageV1 page) } } - private ValuesReader readPageV2(ParquetDataPageV2 page) + private ValuesReader readPageV2(DataPageV2 page) { repetitionReader = buildLevelRLEReader(columnDescriptor.getMaxRepetitionLevel(), page.getRepetitionLevels()); definitionReader = buildLevelRLEReader(columnDescriptor.getMaxDefinitionLevel(), page.getDefinitionLevels()); return initDataReader(page.getDataEncoding(), page.getSlice().getBytes(), 0, page.getValueCount()); } - private ParquetLevelReader buildLevelRLEReader(int maxLevel, Slice slice) + private LevelReader buildLevelRLEReader(int maxLevel, Slice slice) { if (maxLevel == 0) { - return new ParquetLevelNullReader(); + return new LevelNullReader(); } - return new ParquetLevelRLEReader(new RunLengthBitPackingHybridDecoder(BytesUtils.getWidthFromMaxInt(maxLevel), new ByteArrayInputStream(slice.getBytes()))); + return new LevelRLEReader(new RunLengthBitPackingHybridDecoder(BytesUtils.getWidthFromMaxInt(maxLevel), new ByteArrayInputStream(slice.getBytes()))); } private ValuesReader initDataReader(ParquetEncoding dataEncoding, byte[] bytes, int offset, int valueCount) diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetShortDecimalColumnReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ShortDecimalColumnReader.java similarity index 85% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetShortDecimalColumnReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ShortDecimalColumnReader.java index e3d72532fab0f..db15d58cf7835 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetShortDecimalColumnReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/ShortDecimalColumnReader.java @@ -11,20 +11,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -import com.facebook.presto.hive.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.RichColumnDescriptor; import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.type.Type; -import static com.facebook.presto.hive.util.DecimalUtils.getShortDecimalValue; +import static com.facebook.presto.parquet.ParquetTypeUtils.getShortDecimalValue; import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT64; -public class ParquetShortDecimalColumnReader - extends ParquetPrimitiveColumnReader +public class ShortDecimalColumnReader + extends PrimitiveColumnReader { - ParquetShortDecimalColumnReader(RichColumnDescriptor descriptor) + ShortDecimalColumnReader(RichColumnDescriptor descriptor) { super(descriptor); } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetStructColumnReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/StructColumnReader.java similarity index 88% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetStructColumnReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/StructColumnReader.java index a6cebcda36c11..da35f4fb3f2b2 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetStructColumnReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/StructColumnReader.java @@ -11,17 +11,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -import com.facebook.presto.hive.parquet.Field; +import com.facebook.presto.parquet.Field; import it.unimi.dsi.fastutil.booleans.BooleanArrayList; import it.unimi.dsi.fastutil.booleans.BooleanList; -import static com.facebook.presto.hive.parquet.ParquetTypeUtils.isValueNull; +import static com.facebook.presto.parquet.ParquetTypeUtils.isValueNull; -public class ParquetStructColumnReader +public class StructColumnReader { - private ParquetStructColumnReader() + private StructColumnReader() { } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetTimestampColumnReader.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/TimestampColumnReader.java similarity index 78% rename from presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetTimestampColumnReader.java rename to presto-parquet/src/main/java/com/facebook/presto/parquet/reader/TimestampColumnReader.java index 7c79859dac908..9a3820df675b1 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/reader/ParquetTimestampColumnReader.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/reader/TimestampColumnReader.java @@ -11,19 +11,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet.reader; +package com.facebook.presto.parquet.reader; -import com.facebook.presto.hive.parquet.RichColumnDescriptor; +import com.facebook.presto.parquet.RichColumnDescriptor; import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.type.Type; import parquet.io.api.Binary; -import static com.facebook.presto.hive.parquet.ParquetTimestampUtils.getTimestampMillis; +import static com.facebook.presto.parquet.ParquetTimestampUtils.getTimestampMillis; -public class ParquetTimestampColumnReader - extends ParquetPrimitiveColumnReader +public class TimestampColumnReader + extends PrimitiveColumnReader { - public ParquetTimestampColumnReader(RichColumnDescriptor descriptor) + public TimestampColumnReader(RichColumnDescriptor descriptor) { super(descriptor); } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/TestParquetTimestampUtils.java b/presto-parquet/src/test/java/com/facebook/presto/parquet/TestParquetTimestampUtils.java similarity index 87% rename from presto-hive/src/test/java/com/facebook/presto/hive/parquet/TestParquetTimestampUtils.java rename to presto-parquet/src/test/java/com/facebook/presto/parquet/TestParquetTimestampUtils.java index 3bdfd508cfad9..3205c2e663245 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/TestParquetTimestampUtils.java +++ b/presto-parquet/src/test/java/com/facebook/presto/parquet/TestParquetTimestampUtils.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; +package com.facebook.presto.parquet; import com.facebook.presto.spi.PrestoException; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; @@ -20,8 +20,8 @@ import java.sql.Timestamp; -import static com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA; -import static com.facebook.presto.hive.parquet.ParquetTimestampUtils.getTimestampMillis; +import static com.facebook.presto.parquet.ParquetTimestampUtils.getTimestampMillis; +import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static org.testng.Assert.assertEquals; public class TestParquetTimestampUtils @@ -42,7 +42,7 @@ public void testInvalidBinaryLength() getTimestampMillis(Binary.fromByteArray(invalidLengthBinaryTimestamp)); } catch (PrestoException e) { - assertEquals(e.getErrorCode(), HIVE_BAD_DATA.toErrorCode()); + assertEquals(e.getErrorCode(), NOT_SUPPORTED.toErrorCode()); assertEquals(e.getMessage(), "Parquet timestamp must be 12 bytes, actual 8"); } } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/TestTupleDomainParquetPredicate.java b/presto-parquet/src/test/java/com/facebook/presto/parquet/TestTupleDomainParquetPredicate.java similarity index 94% rename from presto-hive/src/test/java/com/facebook/presto/hive/parquet/TestTupleDomainParquetPredicate.java rename to presto-parquet/src/test/java/com/facebook/presto/parquet/TestTupleDomainParquetPredicate.java index e0b6dea1ad06f..e4b53c4aafe7c 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/parquet/TestTupleDomainParquetPredicate.java +++ b/presto-parquet/src/test/java/com/facebook/presto/parquet/TestTupleDomainParquetPredicate.java @@ -11,10 +11,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.hive.parquet; +package com.facebook.presto.parquet; -import com.facebook.presto.hive.parquet.predicate.ParquetDictionaryDescriptor; -import com.facebook.presto.hive.parquet.predicate.TupleDomainParquetPredicate; +import com.facebook.presto.parquet.predicate.DictionaryDescriptor; +import com.facebook.presto.parquet.predicate.TupleDomainParquetPredicate; import com.facebook.presto.spi.predicate.Domain; import com.facebook.presto.spi.predicate.TupleDomain; import com.facebook.presto.spi.predicate.ValueSet; @@ -35,8 +35,8 @@ import java.util.Map; import java.util.Optional; -import static com.facebook.presto.hive.parquet.ParquetEncoding.PLAIN_DICTIONARY; -import static com.facebook.presto.hive.parquet.predicate.TupleDomainParquetPredicate.getDomain; +import static com.facebook.presto.parquet.ParquetEncoding.PLAIN_DICTIONARY; +import static com.facebook.presto.parquet.predicate.TupleDomainParquetPredicate.getDomain; import static com.facebook.presto.spi.predicate.Domain.all; import static com.facebook.presto.spi.predicate.Domain.create; import static com.facebook.presto.spi.predicate.Domain.notNull; @@ -210,8 +210,8 @@ public void testMatchesWithDescriptors() RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column")); TupleDomain effectivePredicate = getEffectivePredicate(column, createVarcharType(255), EMPTY_SLICE); TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column)); - ParquetDictionaryPage page = new ParquetDictionaryPage(Slices.wrappedBuffer(new byte[] {0, 0, 0, 0}), 1, PLAIN_DICTIONARY); - assertTrue(parquetPredicate.matches(singletonMap(column, new ParquetDictionaryDescriptor(column, Optional.of(page))))); + DictionaryPage page = new DictionaryPage(Slices.wrappedBuffer(new byte[] {0, 0, 0, 0}), 1, PLAIN_DICTIONARY); + assertTrue(parquetPredicate.matches(singletonMap(column, new DictionaryDescriptor(column, Optional.of(page))))); } private TupleDomain getEffectivePredicate(RichColumnDescriptor column, VarcharType type, Slice value) diff --git a/presto-product-tests/src/main/java/com/facebook/presto/tests/hive/TestHiveStorageFormats.java b/presto-product-tests/src/main/java/com/facebook/presto/tests/hive/TestHiveStorageFormats.java index ef2b01ca523cf..9b8ddc458863c 100644 --- a/presto-product-tests/src/main/java/com/facebook/presto/tests/hive/TestHiveStorageFormats.java +++ b/presto-product-tests/src/main/java/com/facebook/presto/tests/hive/TestHiveStorageFormats.java @@ -50,8 +50,7 @@ public static Object[][] storageFormats() {storageFormat("ORC", ImmutableMap.of("hive.orc_optimized_writer_enabled", "false"))}, {storageFormat("ORC", ImmutableMap.of("hive.orc_optimized_writer_enabled", "true", "hive.orc_optimized_writer_validate", "true"))}, {storageFormat("DWRF")}, - {storageFormat("PARQUET", ImmutableMap.of("hive.parquet_optimized_reader_enabled", "false"))}, - {storageFormat("PARQUET", ImmutableMap.of("hive.parquet_optimized_reader_enabled", "true"))}, + {storageFormat("PARQUET")}, {storageFormat("RCBINARY", ImmutableMap.of("hive.rcfile_optimized_writer_enabled", "false", "hive.rcfile_optimized_writer_validate", "false"))}, {storageFormat("RCBINARY", ImmutableMap.of("hive.rcfile_optimized_writer_enabled", "true", "hive.rcfile_optimized_writer_validate", "true"))}, {storageFormat("RCTEXT", ImmutableMap.of("hive.rcfile_optimized_writer_enabled", "false", "hive.rcfile_optimized_writer_validate", "false"))},