diff --git a/pom.xml b/pom.xml index ffe041931075b..880cff4ec3161 100644 --- a/pom.xml +++ b/pom.xml @@ -608,7 +608,7 @@ com.facebook.presto.orc orc-protobuf - 11 + 12 diff --git a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/HiveType.java b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/HiveType.java index de390c2f0aa5a..566fa5f3ac8c3 100644 --- a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/HiveType.java +++ b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/HiveType.java @@ -194,7 +194,7 @@ public static List toHiveTypes(String hiveTypes) .collect(toList())); } - private static HiveType toHiveType(TypeInfo typeInfo) + public static HiveType toHiveType(TypeInfo typeInfo) { requireNonNull(typeInfo, "typeInfo is null"); return new HiveType(typeInfo); diff --git a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java index 58cef8f596730..a0e7eecbc5acf 100644 --- a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java +++ b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java @@ -432,7 +432,7 @@ public synchronized void replaceTable(MetastoreContext metastoreContext, String checkArgument(!newTable.getTableType().equals(TEMPORARY_TABLE), "temporary tables must never be stored in the metastore"); Table table = getRequiredTable(metastoreContext, databaseName, tableName); - if (!table.getTableType().equals(VIRTUAL_VIEW) || !newTable.getTableType().equals(VIRTUAL_VIEW)) { + if ((!table.getTableType().equals(VIRTUAL_VIEW) || !newTable.getTableType().equals(VIRTUAL_VIEW)) && !isIcebergTable(table.getParameters())) { throw new PrestoException(HIVE_METASTORE_ERROR, "Only views can be updated with replaceTable"); } if (!table.getDatabaseName().equals(databaseName) || !table.getTableName().equals(tableName)) { diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/OrcFileWriter.java b/presto-hive/src/main/java/com/facebook/presto/hive/OrcFileWriter.java index 8752c5e7bfcc3..695951e310380 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/OrcFileWriter.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/OrcFileWriter.java @@ -29,6 +29,7 @@ import com.facebook.presto.orc.OrcWriterOptions; import com.facebook.presto.orc.WriterStats; import com.facebook.presto.orc.metadata.CompressionKind; +import com.facebook.presto.orc.metadata.OrcType; import com.facebook.presto.spi.PrestoException; import com.google.common.collect.ImmutableList; import org.joda.time.DateTimeZone; @@ -58,7 +59,7 @@ public class OrcFileWriter private static final int INSTANCE_SIZE = ClassLayout.parseClass(OrcFileWriter.class).instanceSize(); private static final ThreadMXBean THREAD_MX_BEAN = ManagementFactory.getThreadMXBean(); - private final OrcWriter orcWriter; + protected final OrcWriter orcWriter; private final Callable rollbackAction; private final int[] fileInputColumnIndexes; private final List nullBlocks; @@ -83,6 +84,43 @@ public OrcFileWriter( WriterStats stats, DwrfEncryptionProvider dwrfEncryptionProvider, Optional dwrfWriterEncryption) + { + this( + dataSink, + rollbackAction, + orcEncoding, + columnNames, + fileColumnTypes, + Optional.empty(), + compression, + options, + fileInputColumnIndexes, + metadata, + hiveStorageTimeZone, + validationInputFactory, + validationMode, + stats, + dwrfEncryptionProvider, + dwrfWriterEncryption); + } + + public OrcFileWriter( + DataSink dataSink, + Callable rollbackAction, + OrcEncoding orcEncoding, + List columnNames, + List fileColumnTypes, + Optional> fileColumnOrcTypes, + CompressionKind compression, + OrcWriterOptions options, + int[] fileInputColumnIndexes, + Map metadata, + DateTimeZone hiveStorageTimeZone, + Optional> validationInputFactory, + OrcWriteValidationMode validationMode, + WriterStats stats, + DwrfEncryptionProvider dwrfEncryptionProvider, + Optional dwrfWriterEncryption) { requireNonNull(dataSink, "dataSink is null"); @@ -91,6 +129,7 @@ public OrcFileWriter( dataSink, columnNames, fileColumnTypes, + fileColumnOrcTypes, orcEncoding, compression, dwrfWriterEncryption, diff --git a/presto-iceberg/pom.xml b/presto-iceberg/pom.xml index 3df057236cac8..5cf4e4fc8a0f4 100644 --- a/presto-iceberg/pom.xml +++ b/presto-iceberg/pom.xml @@ -21,11 +21,6 @@ concurrent - - com.facebook.presto - presto-client - - com.facebook.presto presto-hive-common @@ -104,6 +99,11 @@ + + com.facebook.presto + presto-orc + + com.facebook.presto presto-plugin-toolkit diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/FilesTable.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/FilesTable.java new file mode 100644 index 0000000000000..898848c7a2b4f --- /dev/null +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/FilesTable.java @@ -0,0 +1,187 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg; + +import com.facebook.presto.common.Page; +import com.facebook.presto.common.predicate.TupleDomain; +import com.facebook.presto.common.type.ArrayType; +import com.facebook.presto.common.type.StandardTypes; +import com.facebook.presto.common.type.TypeManager; +import com.facebook.presto.common.type.TypeSignatureParameter; +import com.facebook.presto.iceberg.util.PageListBuilder; +import com.facebook.presto.spi.ColumnMetadata; +import com.facebook.presto.spi.ConnectorPageSource; +import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.ConnectorTableMetadata; +import com.facebook.presto.spi.FixedPageSource; +import com.facebook.presto.spi.SchemaTableName; +import com.facebook.presto.spi.SystemTable; +import com.facebook.presto.spi.connector.ConnectorTransactionHandle; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import io.airlift.slice.Slices; +import org.apache.iceberg.DataFile; +import org.apache.iceberg.Schema; +import org.apache.iceberg.Table; +import org.apache.iceberg.TableScan; +import org.apache.iceberg.transforms.Transforms; +import org.apache.iceberg.types.Conversions; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; + +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static com.facebook.presto.common.type.BigintType.BIGINT; +import static com.facebook.presto.common.type.IntegerType.INTEGER; +import static com.facebook.presto.common.type.VarbinaryType.VARBINARY; +import static com.facebook.presto.common.type.VarcharType.VARCHAR; +import static com.facebook.presto.iceberg.IcebergUtil.getTableScan; +import static com.facebook.presto.iceberg.util.PageListBuilder.forTable; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static java.util.Objects.requireNonNull; + +public class FilesTable + implements SystemTable +{ + private final ConnectorTableMetadata tableMetadata; + private final Table icebergTable; + private final Optional snapshotId; + + public FilesTable(SchemaTableName tableName, Table icebergTable, Optional snapshotId, TypeManager typeManager) + { + this.icebergTable = requireNonNull(icebergTable, "icebergTable is null"); + + tableMetadata = new ConnectorTableMetadata(requireNonNull(tableName, "tableName is null"), + ImmutableList.builder() + .add(new ColumnMetadata("file_path", VARCHAR)) + .add(new ColumnMetadata("file_format", VARCHAR)) + .add(new ColumnMetadata("record_count", BIGINT)) + .add(new ColumnMetadata("file_size_in_bytes", BIGINT)) + .add(new ColumnMetadata("column_sizes", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of( + TypeSignatureParameter.of(INTEGER.getTypeSignature()), + TypeSignatureParameter.of(BIGINT.getTypeSignature()))))) + .add(new ColumnMetadata("value_counts", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of( + TypeSignatureParameter.of(INTEGER.getTypeSignature()), + TypeSignatureParameter.of(BIGINT.getTypeSignature()))))) + .add(new ColumnMetadata("null_value_counts", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of( + TypeSignatureParameter.of(INTEGER.getTypeSignature()), + TypeSignatureParameter.of(BIGINT.getTypeSignature()))))) + .add(new ColumnMetadata("lower_bounds", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of( + TypeSignatureParameter.of(INTEGER.getTypeSignature()), + TypeSignatureParameter.of(VARCHAR.getTypeSignature()))))) + .add(new ColumnMetadata("upper_bounds", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of( + TypeSignatureParameter.of(INTEGER.getTypeSignature()), + TypeSignatureParameter.of(VARCHAR.getTypeSignature()))))) + .add(new ColumnMetadata("key_metadata", VARBINARY)) + .add(new ColumnMetadata("split_offsets", new ArrayType(BIGINT))) + .build()); + this.snapshotId = requireNonNull(snapshotId, "snapshotId is null"); + } + + @Override + public Distribution getDistribution() + { + return Distribution.SINGLE_COORDINATOR; + } + + @Override + public ConnectorTableMetadata getTableMetadata() + { + return tableMetadata; + } + + @Override + public ConnectorPageSource pageSource(ConnectorTransactionHandle transactionHandle, ConnectorSession session, TupleDomain constraint) + { + return new FixedPageSource(buildPages(tableMetadata, session, icebergTable, snapshotId)); + } + + private static List buildPages(ConnectorTableMetadata tableMetadata, ConnectorSession session, Table icebergTable, Optional snapshotId) + { + PageListBuilder pagesBuilder = forTable(tableMetadata); + TableScan tableScan = getTableScan(TupleDomain.all(), snapshotId, icebergTable).includeColumnStats(); + Map idToTypeMap = getIdToTypeMap(icebergTable.schema()); + + tableScan.planFiles().forEach(fileScanTask -> { + DataFile dataFile = fileScanTask.file(); + pagesBuilder.beginRow(); + pagesBuilder.appendVarchar(dataFile.path().toString()); + pagesBuilder.appendVarchar(dataFile.format().name()); + pagesBuilder.appendBigint(dataFile.recordCount()); + pagesBuilder.appendBigint(dataFile.fileSizeInBytes()); + if (checkNonNull(dataFile.columnSizes(), pagesBuilder)) { + pagesBuilder.appendIntegerBigintMap(dataFile.columnSizes()); + } + if (checkNonNull(dataFile.valueCounts(), pagesBuilder)) { + pagesBuilder.appendIntegerBigintMap(dataFile.valueCounts()); + } + if (checkNonNull(dataFile.nullValueCounts(), pagesBuilder)) { + pagesBuilder.appendIntegerBigintMap(dataFile.nullValueCounts()); + } + if (checkNonNull(dataFile.lowerBounds(), pagesBuilder)) { + pagesBuilder.appendIntegerVarcharMap(dataFile.lowerBounds().entrySet().stream() + .collect(toImmutableMap( + Map.Entry::getKey, + entry -> Transforms.identity(idToTypeMap.get(entry.getKey())).toHumanString( + Conversions.fromByteBuffer(idToTypeMap.get(entry.getKey()), entry.getValue()))))); + } + if (checkNonNull(dataFile.upperBounds(), pagesBuilder)) { + pagesBuilder.appendIntegerVarcharMap(dataFile.upperBounds().entrySet().stream() + .collect(toImmutableMap( + Map.Entry::getKey, + entry -> Transforms.identity(idToTypeMap.get(entry.getKey())).toHumanString( + Conversions.fromByteBuffer(idToTypeMap.get(entry.getKey()), entry.getValue()))))); + } + if (checkNonNull(dataFile.keyMetadata(), pagesBuilder)) { + pagesBuilder.appendVarbinary(Slices.wrappedBuffer(dataFile.keyMetadata())); + } + if (checkNonNull(dataFile.splitOffsets(), pagesBuilder)) { + pagesBuilder.appendBigintArray(dataFile.splitOffsets()); + } + pagesBuilder.endRow(); + }); + + return pagesBuilder.build(); + } + + private static Map getIdToTypeMap(Schema schema) + { + ImmutableMap.Builder idToTypeMap = ImmutableMap.builder(); + for (Types.NestedField field : schema.columns()) { + populateIdToTypeMap(field, idToTypeMap); + } + return idToTypeMap.build(); + } + + private static void populateIdToTypeMap(Types.NestedField field, ImmutableMap.Builder idToTypeMap) + { + Type type = field.type(); + idToTypeMap.put(field.fieldId(), type); + if (type instanceof Type.NestedType) { + type.asNestedType().fields().forEach(child -> populateIdToTypeMap(child, idToTypeMap)); + } + } + + private static boolean checkNonNull(Object object, PageListBuilder pagesBuilder) + { + if (object == null) { + pagesBuilder.appendNull(); + return false; + } + return true; + } +} diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergFileWriterFactory.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergFileWriterFactory.java index 10c78108078d5..c7fcd5904ab65 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergFileWriterFactory.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergFileWriterFactory.java @@ -13,14 +13,26 @@ */ package com.facebook.presto.iceberg; +import com.facebook.presto.common.io.DataSink; +import com.facebook.presto.common.io.OutputStreamDataSink; import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.TypeManager; import com.facebook.presto.hive.FileFormatDataSourceStats; import com.facebook.presto.hive.HdfsContext; import com.facebook.presto.hive.HdfsEnvironment; +import com.facebook.presto.hive.HiveDwrfEncryptionProvider; +import com.facebook.presto.hive.HiveSessionProperties; +import com.facebook.presto.hive.NodeVersion; +import com.facebook.presto.hive.OrcFileWriterConfig; +import com.facebook.presto.hive.orc.HdfsOrcDataSource; +import com.facebook.presto.orc.DwrfEncryptionProvider; +import com.facebook.presto.orc.OrcDataSource; +import com.facebook.presto.orc.OrcDataSourceId; +import com.facebook.presto.orc.OrcWriterStats; import com.facebook.presto.parquet.writer.ParquetWriterOptions; import com.facebook.presto.spi.ConnectorSession; import com.facebook.presto.spi.PrestoException; +import com.google.common.collect.ImmutableMap; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; @@ -32,35 +44,58 @@ import java.io.IOException; import java.util.List; +import java.util.Optional; import java.util.concurrent.Callable; +import java.util.function.Supplier; import java.util.stream.IntStream; +import static com.facebook.presto.hive.HiveMetadata.PRESTO_VERSION_NAME; import static com.facebook.presto.hive.HiveSessionProperties.getParquetWriterBlockSize; import static com.facebook.presto.hive.HiveSessionProperties.getParquetWriterPageSize; +import static com.facebook.presto.hive.metastore.MetastoreUtil.PRESTO_QUERY_ID_NAME; import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_WRITER_OPEN_ERROR; +import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_WRITE_VALIDATION_FAILED; import static com.facebook.presto.iceberg.IcebergSessionProperties.getCompressionCodec; +import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxBufferSize; +import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance; +import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcOptimizedWriterValidateMode; +import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcStreamBufferSize; +import static com.facebook.presto.iceberg.IcebergSessionProperties.isOrcOptimizedWriterValidate; +import static com.facebook.presto.iceberg.TypeConverter.toOrcType; import static com.facebook.presto.iceberg.TypeConverter.toPrestoType; import static com.facebook.presto.iceberg.util.PrimitiveTypeMapBuilder.makeTypeMap; +import static com.facebook.presto.orc.OrcEncoding.ORC; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.google.common.collect.ImmutableList.toImmutableList; import static java.util.Objects.requireNonNull; import static org.apache.iceberg.parquet.ParquetSchemaUtil.convert; +import static org.joda.time.DateTimeZone.UTC; public class IcebergFileWriterFactory { private final HdfsEnvironment hdfsEnvironment; private final TypeManager typeManager; private final FileFormatDataSourceStats readStats; + private final NodeVersion nodeVersion; + private final OrcWriterStats orcWriterStats = new OrcWriterStats(); + private final OrcFileWriterConfig orcFileWriterConfig; + private final DwrfEncryptionProvider dwrfEncryptionProvider; @Inject public IcebergFileWriterFactory( HdfsEnvironment hdfsEnvironment, TypeManager typeManager, - FileFormatDataSourceStats readStats) + FileFormatDataSourceStats readStats, + NodeVersion nodeVersion, + OrcFileWriterConfig orcFileWriterConfig, + HiveDwrfEncryptionProvider dwrfEncryptionProvider) { this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); this.typeManager = requireNonNull(typeManager, "typeManager is null"); this.readStats = requireNonNull(readStats, "readStats is null"); + this.nodeVersion = requireNonNull(nodeVersion, "nodeVersion is null"); + this.orcFileWriterConfig = requireNonNull(orcFileWriterConfig, "orcFileWriterConfig is null"); + this.dwrfEncryptionProvider = requireNonNull(dwrfEncryptionProvider, "DwrfEncryptionProvider is null").toDwrfEncryptionProvider(); } public IcebergFileWriter createFileWriter( @@ -72,9 +107,10 @@ public IcebergFileWriter createFileWriter( FileFormat fileFormat) { switch (fileFormat) { - // TODO: support ORC case PARQUET: return createParquetWriter(outputPath, icebergSchema, jobConf, session, hdfsContext); + case ORC: + return createOrcWriter(outputPath, icebergSchema, jobConf, session); } throw new PrestoException(NOT_SUPPORTED, "File format not supported for Iceberg: " + fileFormat); } @@ -124,4 +160,81 @@ private IcebergFileWriter createParquetWriter( throw new PrestoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating Parquet file", e); } } + + private IcebergFileWriter createOrcWriter( + Path outputPath, + Schema icebergSchema, + JobConf jobConf, + ConnectorSession session) + { + try { + FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), outputPath, jobConf); + DataSink orcDataSink = hdfsEnvironment.doAs(session.getUser(), () -> new OutputStreamDataSink(fileSystem.create(outputPath))); + Callable rollbackAction = () -> { + hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.delete(outputPath, false)); + return null; + }; + + List columnFields = icebergSchema.columns(); + List fileColumnNames = columnFields.stream() + .map(Types.NestedField::name) + .collect(toImmutableList()); + List fileColumnTypes = columnFields.stream() + .map(Types.NestedField::type) + .map(type -> toPrestoType(type, typeManager)) + .collect(toImmutableList()); + + Optional> validationInputFactory = Optional.empty(); + if (isOrcOptimizedWriterValidate(session)) { + validationInputFactory = Optional.of(() -> { + try { + return new HdfsOrcDataSource( + new OrcDataSourceId(outputPath.toString()), + hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.getFileStatus(outputPath).getLen()), + getOrcMaxMergeDistance(session), + getOrcMaxBufferSize(session), + getOrcStreamBufferSize(session), + false, + hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.open(outputPath)), + readStats); + } + catch (IOException e) { + throw new PrestoException(ICEBERG_WRITE_VALIDATION_FAILED, e); + } + }); + } + + return new IcebergOrcFileWriter( + icebergSchema, + orcDataSink, + rollbackAction, + ORC, + fileColumnNames, + fileColumnTypes, + toOrcType(icebergSchema), + getCompressionCodec(session).getOrcCompressionKind(), + orcFileWriterConfig + .toOrcWriterOptionsBuilder() + .withStripeMinSize(HiveSessionProperties.getOrcOptimizedWriterMinStripeSize(session)) + .withStripeMaxSize(HiveSessionProperties.getOrcOptimizedWriterMaxStripeSize(session)) + .withStripeMaxRowCount(HiveSessionProperties.getOrcOptimizedWriterMaxStripeRows(session)) + .withDictionaryMaxMemory(HiveSessionProperties.getOrcOptimizedWriterMaxDictionaryMemory(session)) + .withMaxStringStatisticsLimit(HiveSessionProperties.getOrcStringStatisticsLimit(session)) + .build(), + IntStream.range(0, fileColumnNames.size()).toArray(), + ImmutableMap.builder() + .put(PRESTO_VERSION_NAME, nodeVersion.toString()) + .put(PRESTO_QUERY_ID_NAME, session.getQueryId()) + .build(), + UTC, + validationInputFactory, + getOrcOptimizedWriterValidateMode(session), + orcWriterStats, + dwrfEncryptionProvider, + Optional.empty()); + } + catch (IOException e) { + throw new PrestoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating ORC file", e); + } + } } diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergMetadata.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergMetadata.java index 6aa1a6235e345..500c45d9757f9 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergMetadata.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergMetadata.java @@ -104,6 +104,7 @@ import static com.google.common.base.Verify.verify; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static java.util.Collections.singletonList; import static java.util.Objects.requireNonNull; import static java.util.function.Function.identity; import static java.util.stream.Collectors.toList; @@ -200,7 +201,7 @@ private Optional getRawSystemTable(ConnectorSession session, Schema return Optional.empty(); } - org.apache.iceberg.Table table = getIcebergTable(metastore, hdfsEnvironment, session, tableName); + org.apache.iceberg.Table table = getIcebergTable(metastore, hdfsEnvironment, session, new SchemaTableName(tableName.getSchemaName(), name.getTableName())); SchemaTableName systemTableName = new SchemaTableName(tableName.getSchemaName(), name.getTableNameWithType()); switch (name.getTableType()) { @@ -220,6 +221,8 @@ private Optional getRawSystemTable(ConnectorSession session, Schema return Optional.of(new PartitionTable(systemTableName, typeManager, table, getSnapshotId(table, name.getSnapshotId()))); case MANIFESTS: return Optional.of(new ManifestsTable(systemTableName, table, getSnapshotId(table, name.getSnapshotId()))); + case FILES: + return Optional.of(new FilesTable(systemTableName, table, getSnapshotId(table, name.getSnapshotId()), typeManager)); } return Optional.empty(); } @@ -261,7 +264,7 @@ public ColumnMetadata getColumnMetadata(ConnectorSession session, ConnectorTable @Override public Map> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix) { - List tables = listTables(session, Optional.of(prefix.getSchemaName())); + List tables = prefix.getTableName() != null ? singletonList(prefix.toSchemaTableName()) : listTables(session, Optional.of(prefix.getSchemaName())); ImmutableMap.Builder> columns = ImmutableMap.builder(); for (SchemaTableName table : tables) { diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergModule.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergModule.java index 4a42e1a9f50cf..2312dc68b25f4 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergModule.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergModule.java @@ -21,6 +21,7 @@ import com.facebook.presto.cache.NoOpCacheManager; import com.facebook.presto.cache.filemerge.FileMergeCacheConfig; import com.facebook.presto.cache.filemerge.FileMergeCacheManager; +import com.facebook.presto.hive.CacheStatsMBean; import com.facebook.presto.hive.DynamicConfigurationProvider; import com.facebook.presto.hive.FileFormatDataSourceStats; import com.facebook.presto.hive.ForCachingHiveMetastore; @@ -29,9 +30,11 @@ import com.facebook.presto.hive.HdfsConfigurationInitializer; import com.facebook.presto.hive.HdfsEnvironment; import com.facebook.presto.hive.HiveClientConfig; +import com.facebook.presto.hive.HiveDwrfEncryptionProvider; import com.facebook.presto.hive.HiveHdfsConfiguration; import com.facebook.presto.hive.HiveNodePartitioningProvider; import com.facebook.presto.hive.MetastoreClientConfig; +import com.facebook.presto.hive.OrcFileWriterConfig; import com.facebook.presto.hive.ParquetFileWriterConfig; import com.facebook.presto.hive.PartitionMutator; import com.facebook.presto.hive.cache.HiveCachingHdfsConfiguration; @@ -42,16 +45,32 @@ import com.facebook.presto.hive.metastore.ExtendedHiveMetastore; import com.facebook.presto.hive.metastore.HivePartitionMutator; import com.facebook.presto.hive.metastore.MetastoreConfig; +import com.facebook.presto.orc.CachingStripeMetadataSource; +import com.facebook.presto.orc.EncryptionLibrary; +import com.facebook.presto.orc.OrcDataSourceId; +import com.facebook.presto.orc.StorageStripeMetadataSource; +import com.facebook.presto.orc.StripeMetadataSource; +import com.facebook.presto.orc.StripeReader; +import com.facebook.presto.orc.UnsupportedEncryptionLibrary; +import com.facebook.presto.orc.cache.CachingOrcFileTailSource; +import com.facebook.presto.orc.cache.OrcCacheConfig; +import com.facebook.presto.orc.cache.OrcFileTailSource; +import com.facebook.presto.orc.cache.StorageOrcFileTailSource; +import com.facebook.presto.orc.metadata.OrcFileTail; import com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider; import com.facebook.presto.spi.connector.ConnectorPageSinkProvider; import com.facebook.presto.spi.connector.ConnectorPageSourceProvider; import com.facebook.presto.spi.connector.ConnectorSplitManager; import com.facebook.presto.spi.procedure.Procedure; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; import com.google.inject.Binder; import com.google.inject.Module; import com.google.inject.Provides; import com.google.inject.Scopes; import com.google.inject.multibindings.Multibinder; +import io.airlift.slice.Slice; +import org.weakref.jmx.MBeanExporter; import org.weakref.jmx.testing.TestingMBeanServer; import javax.inject.Singleton; @@ -64,13 +83,23 @@ import static com.facebook.airlift.json.JsonCodecBinder.jsonCodecBinder; import static com.facebook.presto.cache.CacheType.FILE_MERGE; import static com.google.inject.multibindings.Multibinder.newSetBinder; +import static java.lang.Math.toIntExact; import static java.util.concurrent.Executors.newFixedThreadPool; import static java.util.concurrent.Executors.newScheduledThreadPool; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static org.weakref.jmx.ObjectNames.generatedNameOf; import static org.weakref.jmx.guice.ExportBinder.newExporter; public class IcebergModule implements Module { + private final String connectorId; + + public IcebergModule(String connectorId) + { + this.connectorId = connectorId; + } + @Override public void configure(Binder binder) { @@ -119,6 +148,14 @@ public void configure(Binder binder) Multibinder procedures = newSetBinder(binder, Procedure.class); procedures.addBinding().toProvider(RollbackToSnapshotProcedure.class).in(Scopes.SINGLETON); + + // for orc + binder.bind(EncryptionLibrary.class).annotatedWith(HiveDwrfEncryptionProvider.ForCryptoService.class).to(UnsupportedEncryptionLibrary.class).in(Scopes.SINGLETON); + binder.bind(EncryptionLibrary.class).annotatedWith(HiveDwrfEncryptionProvider.ForUnknown.class).to(UnsupportedEncryptionLibrary.class).in(Scopes.SINGLETON); + binder.bind(HiveDwrfEncryptionProvider.class).in(Scopes.SINGLETON); + + configBinder(binder).bindConfig(OrcCacheConfig.class, connectorId); + configBinder(binder).bindConfig(OrcFileWriterConfig.class); } @ForCachingHiveMetastore @@ -146,4 +183,50 @@ public CacheManager createCacheManager(CacheConfig cacheConfig, FileMergeCacheCo } return new NoOpCacheManager(); } + + @Singleton + @Provides + public OrcFileTailSource createOrcFileTailSource(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) + { + OrcFileTailSource orcFileTailSource = new StorageOrcFileTailSource(); + if (orcCacheConfig.isFileTailCacheEnabled()) { + Cache cache = CacheBuilder.newBuilder() + .maximumWeight(orcCacheConfig.getFileTailCacheSize().toBytes()) + .weigher((id, tail) -> ((OrcFileTail) tail).getFooterSize() + ((OrcFileTail) tail).getMetadataSize()) + .expireAfterAccess(orcCacheConfig.getFileTailCacheTtlSinceLastAccess().toMillis(), MILLISECONDS) + .recordStats() + .build(); + CacheStatsMBean cacheStatsMBean = new CacheStatsMBean(cache); + orcFileTailSource = new CachingOrcFileTailSource(orcFileTailSource, cache); + exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_OrcFileTail"), cacheStatsMBean); + } + return orcFileTailSource; + } + + @Singleton + @Provides + public StripeMetadataSource createStripeMetadataSource(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) + { + StripeMetadataSource stripeMetadataSource = new StorageStripeMetadataSource(); + if (orcCacheConfig.isStripeMetadataCacheEnabled()) { + Cache footerCache = CacheBuilder.newBuilder() + .maximumWeight(orcCacheConfig.getStripeFooterCacheSize().toBytes()) + .weigher((id, footer) -> toIntExact(((Slice) footer).getRetainedSize())) + .expireAfterAccess(orcCacheConfig.getStripeFooterCacheTtlSinceLastAccess().toMillis(), MILLISECONDS) + .recordStats() + .build(); + Cache streamCache = CacheBuilder.newBuilder() + .maximumWeight(orcCacheConfig.getStripeStreamCacheSize().toBytes()) + .weigher((id, stream) -> toIntExact(((Slice) stream).getRetainedSize())) + .expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS) + .recordStats() + .build(); + CacheStatsMBean footerCacheStatsMBean = new CacheStatsMBean(footerCache); + CacheStatsMBean streamCacheStatsMBean = new CacheStatsMBean(streamCache); + stripeMetadataSource = new CachingStripeMetadataSource(stripeMetadataSource, footerCache, streamCache); + exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeFooter"), footerCacheStatsMBean); + exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStream"), streamCacheStatsMBean); + } + return stripeMetadataSource; + } } diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergOrcColumn.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergOrcColumn.java new file mode 100644 index 0000000000000..5ce39b6b90426 --- /dev/null +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergOrcColumn.java @@ -0,0 +1,144 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg; + +import com.facebook.presto.hive.HiveColumnHandle.ColumnType; +import com.facebook.presto.orc.metadata.OrcType.OrcTypeKind; +import com.google.common.collect.ImmutableMap; + +import java.util.Map; +import java.util.Optional; + +import static com.google.common.base.Preconditions.checkArgument; +import static java.util.Objects.requireNonNull; + +public class IcebergOrcColumn +{ + public static final int ROOT_COLUMN_ID = 0; + + private int orcColumnId; + private int orcFieldTypeIndex; + private Optional icebergColumnId; + private String columnName; + private ColumnType columnType; + private OrcTypeKind orcType; + private Map attributes; + + public IcebergOrcColumn( + int orcColumnId, + int orcFieldTypeIndex, + Optional icebergColumnId, + String columnName, + ColumnType columnType, + OrcTypeKind orcType, + Map attributes) + { + checkArgument(orcColumnId >= 0, "orcColumnId is negative"); + checkArgument(orcFieldTypeIndex >= 0, "orcFieldTypeIndex is negative"); + this.orcColumnId = orcColumnId; + this.orcFieldTypeIndex = orcFieldTypeIndex; + this.icebergColumnId = requireNonNull(icebergColumnId, "icebergColumnId is null"); + this.columnName = requireNonNull(columnName, "columnName is null"); + this.columnType = requireNonNull(columnType, "columnType is null"); + this.orcType = requireNonNull(orcType, "orcType is null"); + + this.attributes = ImmutableMap.copyOf(requireNonNull(attributes, "attributes is null")); + } + + public int getOrcColumnId() + { + return orcColumnId; + } + + public int getOrcFieldTypeIndex() + { + return orcFieldTypeIndex; + } + + public Optional getIcebergColumnId() + { + return icebergColumnId; + } + + public IcebergOrcColumn setIcebergColumnId(Optional icebergColumnId) + { + this.icebergColumnId = requireNonNull(icebergColumnId, "icebergColumnId is null"); + return this; + } + + public String getColumnName() + { + return columnName; + } + + public IcebergOrcColumn setColumnName(String columnName) + { + this.columnName = requireNonNull(columnName, "columnName is null"); + return this; + } + + public ColumnType getColumnType() + { + return columnType; + } + + public IcebergOrcColumn setColumnType(ColumnType columnType) + { + this.columnType = requireNonNull(columnType, "columnType is null"); + return this; + } + + public OrcTypeKind getOrcType() + { + return orcType; + } + + public IcebergOrcColumn setOrcType(OrcTypeKind orcType) + { + this.orcType = requireNonNull(orcType, "orcType is null"); + return this; + } + + public Map getAttributes() + { + return attributes; + } + + public static IcebergOrcColumn copy(IcebergOrcColumn other) + { + requireNonNull(other, "copy from other IcebergOrcColumn is null"); + return new IcebergOrcColumn( + other.getOrcColumnId(), + other.getOrcFieldTypeIndex(), + other.getIcebergColumnId(), + other.getColumnName(), + other.getColumnType(), + other.getOrcType(), + other.getAttributes()); + } + + @Override + public String toString() + { + return "IcebergOrcColumn{" + + "orcColumnId=" + orcColumnId + + ", orcFieldTypeIndex=" + orcFieldTypeIndex + + ", icebergColumnId=" + icebergColumnId + + ", columnName='" + columnName + '\'' + + ", columnType=" + columnType + + ", orcType=" + orcType + + ", attributes=" + attributes + + '}'; + } +} diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergOrcFileWriter.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergOrcFileWriter.java new file mode 100644 index 0000000000000..b0de928df3020 --- /dev/null +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergOrcFileWriter.java @@ -0,0 +1,259 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg; + +import com.facebook.presto.common.io.DataSink; +import com.facebook.presto.common.type.Type; +import com.facebook.presto.hive.OrcFileWriter; +import com.facebook.presto.orc.DwrfEncryptionProvider; +import com.facebook.presto.orc.DwrfWriterEncryption; +import com.facebook.presto.orc.OrcDataSource; +import com.facebook.presto.orc.OrcEncoding; +import com.facebook.presto.orc.OrcWriteValidation; +import com.facebook.presto.orc.OrcWriterOptions; +import com.facebook.presto.orc.OrcWriterStats; +import com.facebook.presto.orc.metadata.CompressionKind; +import com.facebook.presto.orc.metadata.OrcType; +import com.facebook.presto.orc.metadata.statistics.ColumnStatistics; +import com.facebook.presto.orc.metadata.statistics.DateStatistics; +import com.facebook.presto.orc.metadata.statistics.DecimalStatistics; +import com.facebook.presto.orc.metadata.statistics.DoubleStatistics; +import com.facebook.presto.orc.metadata.statistics.IntegerStatistics; +import com.facebook.presto.orc.metadata.statistics.StringStatistics; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import io.airlift.slice.Slice; +import org.apache.iceberg.Metrics; +import org.apache.iceberg.Schema; +import org.apache.iceberg.types.Conversions; +import org.joda.time.DateTimeZone; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.function.Supplier; + +import static com.facebook.presto.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY; +import static com.google.common.base.Verify.verify; +import static java.lang.Math.toIntExact; +import static java.util.Objects.requireNonNull; +import static org.apache.iceberg.types.Types.DecimalType; +import static org.apache.iceberg.types.Types.NestedField; + +public class IcebergOrcFileWriter + extends OrcFileWriter + implements IcebergFileWriter +{ + private final Schema icebergSchema; + private final List orcColumn; + + public IcebergOrcFileWriter( + Schema icebergSchema, + DataSink dataSink, + Callable rollbackAction, + OrcEncoding orcEncoding, + List columnNames, + List fileColumnTypes, + List fileColumnOrcTypes, + CompressionKind compression, + OrcWriterOptions options, + int[] fileInputColumnIndexes, + Map metadata, + DateTimeZone hiveStorageTimeZone, + Optional> validationInputFactory, + OrcWriteValidation.OrcWriteValidationMode validationMode, + OrcWriterStats stats, + DwrfEncryptionProvider dwrfEncryptionProvider, + Optional dwrfWriterEncryption) + { + super(dataSink, rollbackAction, orcEncoding, columnNames, fileColumnTypes, Optional.ofNullable(fileColumnOrcTypes), compression, options, fileInputColumnIndexes, metadata, hiveStorageTimeZone, validationInputFactory, validationMode, stats, dwrfEncryptionProvider, dwrfWriterEncryption); + this.icebergSchema = requireNonNull(icebergSchema, "icebergSchema is null"); + this.orcColumn = fileColumnOrcTypes; + } + + @Override + public Metrics getMetrics() + { + return computeMetrics(icebergSchema, orcColumn, orcWriter.getFileRowCount(), orcWriter.getFileStats()); + } + + private static Metrics computeMetrics(Schema icebergSchema, List orcRowTypes, long fileRowCount, List columnStatistics) + { + if (columnStatistics.isEmpty()) { + return new Metrics(fileRowCount, null, null, null, null, null); + } + // Columns that are descendants of LIST or MAP types are excluded because: + // 1. Their stats are not used by Apache Iceberg to filter out data files + // 2. Their record count can be larger than table-level row count. There's no good way to calculate nullCounts for them. + // See https://github.com/apache/iceberg/pull/199#discussion_r429443627 + Set excludedColumns = getExcludedColumns(orcRowTypes); + + ImmutableMap.Builder valueCountsBuilder = ImmutableMap.builder(); + ImmutableMap.Builder nullCountsBuilder = ImmutableMap.builder(); + ImmutableMap.Builder lowerBoundsBuilder = ImmutableMap.builder(); + ImmutableMap.Builder upperBoundsBuilder = ImmutableMap.builder(); + + // OrcColumnId(0) is the root column that represents file-level schema + for (int i = 1; i < orcRowTypes.size(); i++) { + if (excludedColumns.contains(i)) { + continue; + } + OrcType orcColumn = orcRowTypes.get(i); + ColumnStatistics orcColumnStats = columnStatistics.get(i); + int icebergId = getIcebergId(orcColumn); + NestedField icebergField = icebergSchema.findField(icebergId); + verify(icebergField != null, "Cannot find Iceberg column with ID %s in schema %s", icebergId, icebergSchema); + valueCountsBuilder.put(icebergId, fileRowCount); + if (orcColumnStats.hasNumberOfValues()) { + nullCountsBuilder.put(icebergId, fileRowCount - orcColumnStats.getNumberOfValues()); + } + toIcebergMinMax(orcColumnStats, icebergField.type()).ifPresent(minMax -> { + lowerBoundsBuilder.put(icebergId, minMax.getMin()); + upperBoundsBuilder.put(icebergId, minMax.getMax()); + }); + } + Map valueCounts = valueCountsBuilder.build(); + Map nullCounts = nullCountsBuilder.build(); + Map lowerBounds = lowerBoundsBuilder.build(); + Map upperBounds = upperBoundsBuilder.build(); + return new Metrics( + fileRowCount, + null, // TODO: Add column size accounting to ORC column writers + valueCounts.isEmpty() ? null : valueCounts, + nullCounts.isEmpty() ? null : nullCounts, + lowerBounds.isEmpty() ? null : lowerBounds, + upperBounds.isEmpty() ? null : upperBounds); + } + + private static Set getExcludedColumns(List orcRowTypes) + { + ImmutableSet.Builder excludedColumns = ImmutableSet.builder(); + populateExcludedColumns(orcRowTypes, 0, false, excludedColumns); + return excludedColumns.build(); + } + + private static void populateExcludedColumns(List orcRowTypes, int orcColumnId, boolean exclude, ImmutableSet.Builder excludedColumns) + { + if (exclude) { + excludedColumns.add(orcColumnId); + } + OrcType orcColumn = orcRowTypes.get(orcColumnId); + switch (orcColumn.getOrcTypeKind()) { + case LIST: + case MAP: + for (Integer child : orcColumn.getFieldTypeIndexes()) { + populateExcludedColumns(orcRowTypes, child, true, excludedColumns); + } + return; + case STRUCT: + for (Integer child : orcColumn.getFieldTypeIndexes()) { + populateExcludedColumns(orcRowTypes, child, exclude, excludedColumns); + } + return; + } + } + + private static int getIcebergId(OrcType orcColumn) + { + String icebergId = orcColumn.getAttributes().get(ORC_ICEBERG_ID_KEY); + verify(icebergId != null, "ORC column %s doesn't have an associated Iceberg ID", orcColumn); + return Integer.parseInt(icebergId); + } + + private static Optional toIcebergMinMax(ColumnStatistics orcColumnStats, org.apache.iceberg.types.Type icebergType) + { + IntegerStatistics integerStatistics = orcColumnStats.getIntegerStatistics(); + if (integerStatistics != null) { + Object min = integerStatistics.getMin(); + Object max = integerStatistics.getMax(); + if (min == null || max == null) { + return Optional.empty(); + } + if (icebergType.typeId() == org.apache.iceberg.types.Type.TypeID.INTEGER) { + min = toIntExact((Long) min); + max = toIntExact((Long) max); + } + return Optional.of(new IcebergMinMax(icebergType, min, max)); + } + DoubleStatistics doubleStatistics = orcColumnStats.getDoubleStatistics(); + if (doubleStatistics != null) { + Object min = doubleStatistics.getMin(); + Object max = doubleStatistics.getMax(); + if (min == null || max == null) { + return Optional.empty(); + } + if (icebergType.typeId() == org.apache.iceberg.types.Type.TypeID.FLOAT) { + min = ((Double) min).floatValue(); + max = ((Double) max).floatValue(); + } + return Optional.of(new IcebergMinMax(icebergType, min, max)); + } + StringStatistics stringStatistics = orcColumnStats.getStringStatistics(); + if (stringStatistics != null) { + Slice min = stringStatistics.getMin(); + Slice max = stringStatistics.getMax(); + if (min == null || max == null) { + return Optional.empty(); + } + return Optional.of(new IcebergMinMax(icebergType, min.toStringUtf8(), max.toStringUtf8())); + } + DateStatistics dateStatistics = orcColumnStats.getDateStatistics(); + if (dateStatistics != null) { + Integer min = dateStatistics.getMin(); + Integer max = dateStatistics.getMax(); + if (min == null || max == null) { + return Optional.empty(); + } + return Optional.of(new IcebergMinMax(icebergType, min, max)); + } + DecimalStatistics decimalStatistics = orcColumnStats.getDecimalStatistics(); + if (decimalStatistics != null) { + BigDecimal min = decimalStatistics.getMin(); + BigDecimal max = decimalStatistics.getMax(); + if (min == null || max == null) { + return Optional.empty(); + } + min = min.setScale(((DecimalType) icebergType).scale()); + max = max.setScale(((DecimalType) icebergType).scale()); + return Optional.of(new IcebergMinMax(icebergType, min, max)); + } + return Optional.empty(); + } + + private static class IcebergMinMax + { + private ByteBuffer min; + private ByteBuffer max; + + private IcebergMinMax(org.apache.iceberg.types.Type type, Object min, Object max) + { + this.min = Conversions.toByteBuffer(type, min); + this.max = Conversions.toByteBuffer(type, max); + } + + public ByteBuffer getMin() + { + return min; + } + + public ByteBuffer getMax() + { + return max; + } + } +} diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergPageSourceProvider.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergPageSourceProvider.java index ce0a75780b08c..ac25910b31e4d 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergPageSourceProvider.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergPageSourceProvider.java @@ -13,16 +13,40 @@ */ package com.facebook.presto.iceberg; +import com.facebook.presto.common.RuntimeStats; import com.facebook.presto.common.predicate.Domain; import com.facebook.presto.common.predicate.TupleDomain; import com.facebook.presto.common.type.StandardTypes; import com.facebook.presto.common.type.Type; +import com.facebook.presto.common.type.TypeManager; +import com.facebook.presto.hive.EncryptionInformation; import com.facebook.presto.hive.FileFormatDataSourceStats; import com.facebook.presto.hive.HdfsContext; import com.facebook.presto.hive.HdfsEnvironment; +import com.facebook.presto.hive.HiveClientConfig; +import com.facebook.presto.hive.HiveColumnHandle; +import com.facebook.presto.hive.HiveDwrfEncryptionProvider; +import com.facebook.presto.hive.HiveOrcAggregatedMemoryContext; import com.facebook.presto.hive.filesystem.ExtendedFileSystem; +import com.facebook.presto.hive.orc.HdfsOrcDataSource; +import com.facebook.presto.hive.orc.OrcBatchPageSource; +import com.facebook.presto.hive.orc.ProjectionBasedDwrfKeyProvider; import com.facebook.presto.hive.parquet.ParquetPageSource; import com.facebook.presto.memory.context.AggregatedMemoryContext; +import com.facebook.presto.orc.DwrfEncryptionProvider; +import com.facebook.presto.orc.DwrfKeyProvider; +import com.facebook.presto.orc.OrcAggregatedMemoryContext; +import com.facebook.presto.orc.OrcBatchRecordReader; +import com.facebook.presto.orc.OrcDataSource; +import com.facebook.presto.orc.OrcDataSourceId; +import com.facebook.presto.orc.OrcEncoding; +import com.facebook.presto.orc.OrcPredicate; +import com.facebook.presto.orc.OrcReader; +import com.facebook.presto.orc.OrcReaderOptions; +import com.facebook.presto.orc.StripeMetadataSource; +import com.facebook.presto.orc.TupleDomainOrcPredicate; +import com.facebook.presto.orc.cache.OrcFileTailSource; +import com.facebook.presto.orc.metadata.OrcType; import com.facebook.presto.parquet.Field; import com.facebook.presto.parquet.ParquetCorruptionException; import com.facebook.presto.parquet.ParquetDataSource; @@ -45,6 +69,8 @@ import io.airlift.units.DataSize; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.BlockMissingException; import org.apache.iceberg.FileFormat; @@ -64,7 +90,9 @@ import java.util.Objects; import java.util.Optional; import java.util.function.Function; +import java.util.stream.IntStream; +import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR; import static com.facebook.presto.hive.HiveFileContext.DEFAULT_HIVE_FILE_CONTEXT; import static com.facebook.presto.hive.HiveSessionProperties.getParquetMaxReadBlockSize; import static com.facebook.presto.hive.HiveSessionProperties.isFailOnCorruptedParquetStatistics; @@ -74,8 +102,22 @@ import static com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource; import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA; import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT; +import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR; import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA; +import static com.facebook.presto.iceberg.IcebergOrcColumn.ROOT_COLUMN_ID; +import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcLazyReadSmallRanges; +import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxBufferSize; +import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance; +import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxReadBlockSize; +import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcStreamBufferSize; +import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcTinyStripeThreshold; +import static com.facebook.presto.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled; +import static com.facebook.presto.iceberg.IcebergSessionProperties.isOrcZstdJniDecompressionEnabled; +import static com.facebook.presto.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY; +import static com.facebook.presto.iceberg.TypeConverter.toHiveType; import static com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext; +import static com.facebook.presto.orc.OrcEncoding.ORC; +import static com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE; import static com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO; import static com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors; import static com.facebook.presto.parquet.ParquetTypeUtils.getParquetTypeByName; @@ -84,24 +126,42 @@ import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static com.google.common.collect.Maps.uniqueIndex; import static java.lang.String.format; +import static java.util.Locale.ENGLISH; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.toList; import static org.apache.parquet.io.ColumnIOConverter.constructField; +import static org.joda.time.DateTimeZone.UTC; public class IcebergPageSourceProvider implements ConnectorPageSourceProvider { private final HdfsEnvironment hdfsEnvironment; private final FileFormatDataSourceStats fileFormatDataSourceStats; + private final TypeManager typeManager; + private final OrcFileTailSource orcFileTailSource; + private final StripeMetadataSource stripeMetadataSource; + private final DwrfEncryptionProvider dwrfEncryptionProvider; + private final HiveClientConfig hiveClientConfig; @Inject public IcebergPageSourceProvider( HdfsEnvironment hdfsEnvironment, - FileFormatDataSourceStats fileFormatDataSourceStats) + FileFormatDataSourceStats fileFormatDataSourceStats, + TypeManager typeManager, + OrcFileTailSource orcFileTailSource, + StripeMetadataSource stripeMetadataSource, + HiveDwrfEncryptionProvider dwrfEncryptionProvider, + HiveClientConfig hiveClientConfig) { this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); this.fileFormatDataSourceStats = requireNonNull(fileFormatDataSourceStats, "fileFormatDataSourceStats is null"); + this.typeManager = requireNonNull(typeManager, "typeManager is null"); + this.orcFileTailSource = requireNonNull(orcFileTailSource, "orcFileTailSource is null"); + this.stripeMetadataSource = requireNonNull(stripeMetadataSource, "stripeMetadataSource is null"); + this.dwrfEncryptionProvider = requireNonNull(dwrfEncryptionProvider, "DwrfEncryptionProvider is null").toDwrfEncryptionProvider(); + this.hiveClientConfig = requireNonNull(hiveClientConfig, "hiveClientConfig is null"); } @Override @@ -139,7 +199,8 @@ public ConnectorPageSource createPageSource( split.getFileFormat(), table.getSchemaTableName(), regularColumns, - table.getPredicate()); + table.getPredicate(), + splitContext.isCacheable()); return new IcebergPageSource(icebergColumns, partitionKeys, dataPageSource, session.getSqlFunctionProperties().getTimeZoneKey()); } @@ -153,10 +214,10 @@ private ConnectorPageSource createDataPageSource( FileFormat fileFormat, SchemaTableName tableName, List dataColumns, - TupleDomain predicate) + TupleDomain predicate, + boolean isCacheable) { switch (fileFormat) { - // TODO: support ORC for iceberg case PARQUET: return createParquetPageSource( hdfsEnvironment, @@ -174,6 +235,46 @@ private ConnectorPageSource createDataPageSource( isParquetBatchReaderVerificationEnabled(session), predicate, fileFormatDataSourceStats); + case ORC: + FileStatus fileStatus = null; + try { + fileStatus = hdfsEnvironment.doAs(session.getUser(), () -> hdfsEnvironment.getFileSystem(hdfsContext, path).getFileStatus(path)); + } + catch (IOException e) { + throw new PrestoException(ICEBERG_FILESYSTEM_ERROR, e); + } + long fileSize = fileStatus.getLen(); + OrcReaderOptions readerOptions = new OrcReaderOptions( + getOrcMaxMergeDistance(session), + getOrcTinyStripeThreshold(session), + getOrcMaxReadBlockSize(session), + isOrcZstdJniDecompressionEnabled(session)); + + // TODO: Implement EncryptionInformation in IcebergSplit instead of Optional.empty() + return createBatchOrcPageSource( + hdfsEnvironment, + session.getUser(), + hdfsEnvironment.getConfiguration(hdfsContext, path), + path, + start, + length, + fileSize, + isCacheable, + dataColumns, + typeManager, + predicate, + readerOptions, + ORC, + getOrcMaxBufferSize(session), + getOrcStreamBufferSize(session), + getOrcLazyReadSmallRanges(session), + isOrcBloomFiltersEnabled(session), + hiveClientConfig.getDomainCompactionThreshold(), + orcFileTailSource, + stripeMetadataSource, + fileFormatDataSourceStats, + Optional.empty(), + dwrfEncryptionProvider); } throw new PrestoException(NOT_SUPPORTED, "File format not supported for Iceberg: " + fileFormat); } @@ -312,4 +413,248 @@ private static TupleDomain getParquetTupleDomain(Map regularColumns, + TypeManager typeManager, + TupleDomain effectivePredicate, + OrcReaderOptions options, + OrcEncoding orcEncoding, + DataSize maxBufferSize, + DataSize streamBufferSize, + boolean lazyReadSmallRanges, + boolean orcBloomFiltersEnabled, + int domainCompactionThreshold, + OrcFileTailSource orcFileTailSource, + StripeMetadataSource stripeMetadataSource, + FileFormatDataSourceStats stats, + Optional encryptionInformation, + DwrfEncryptionProvider dwrfEncryptionProvider) + { + OrcDataSource orcDataSource = null; + try { + FileSystem fileSystem = hdfsEnvironment.getFileSystem(user, path, configuration); + FSDataInputStream inputStream = hdfsEnvironment.doAs(user, () -> fileSystem.open(path)); + orcDataSource = new HdfsOrcDataSource( + new OrcDataSourceId(path.toString()), + fileSize, + options.getMaxMergeDistance(), + maxBufferSize, + streamBufferSize, + lazyReadSmallRanges, + inputStream, + stats); + + // Todo: pass real columns to ProjectionBasedDwrfKeyProvider instead of ImmutableList.of() + DwrfKeyProvider dwrfKeyProvider = new ProjectionBasedDwrfKeyProvider(encryptionInformation, ImmutableList.of(), true, path); + OrcReader reader = new OrcReader( + orcDataSource, + orcEncoding, + orcFileTailSource, + stripeMetadataSource, + new HiveOrcAggregatedMemoryContext(), + options, + isCacheable, + dwrfEncryptionProvider, + dwrfKeyProvider); + + List physicalColumnHandles = new ArrayList<>(regularColumns.size()); + ImmutableMap.Builder includedColumns = ImmutableMap.builder(); + ImmutableList.Builder> columnReferences = ImmutableList.builder(); + + List fileOrcColumns = getFileOrcColumns(reader); + + Map fileOrcColumnByIcebergId = fileOrcColumns.stream() + .filter(orcColumn -> orcColumn.getAttributes().containsKey(ORC_ICEBERG_ID_KEY)) + .collect(toImmutableMap( + orcColumn -> Integer.parseInt(orcColumn.getAttributes().get(ORC_ICEBERG_ID_KEY)), + orcColumn -> IcebergOrcColumn.copy(orcColumn).setIcebergColumnId(Optional.of(Integer.parseInt(orcColumn.getAttributes().get(ORC_ICEBERG_ID_KEY)))))); + + Map fileOrcColumnsByName = uniqueIndex(fileOrcColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH)); + + int nextMissingColumnIndex = fileOrcColumnsByName.size(); + for (IcebergColumnHandle column : regularColumns) { + IcebergOrcColumn icebergOrcColumn; + boolean isExcludeColumn = false; + + if (fileOrcColumnByIcebergId.isEmpty()) { + icebergOrcColumn = fileOrcColumnsByName.get(column.getName()); + } + else { + icebergOrcColumn = fileOrcColumnByIcebergId.get(column.getId()); + if (icebergOrcColumn == null) { + // Cannot get orc column from 'fileOrcColumnByIcebergId', which means SchemaEvolution may have happened, so we get orc column by column name. + icebergOrcColumn = fileOrcColumnsByName.get(column.getName()); + if (icebergOrcColumn != null) { + isExcludeColumn = true; + } + } + } + + if (icebergOrcColumn != null) { + HiveColumnHandle columnHandle = new HiveColumnHandle( + // Todo: using orc file column name + column.getName(), + toHiveType(column.getType()), + column.getType().getTypeSignature(), + icebergOrcColumn.getOrcColumnId(), + icebergOrcColumn.getColumnType(), + Optional.empty(), + Optional.empty()); + + physicalColumnHandles.add(columnHandle); + // Skip SchemaEvolution column + if (!isExcludeColumn) { + includedColumns.put(columnHandle.getHiveColumnIndex(), typeManager.getType(columnHandle.getTypeSignature())); + columnReferences.add(new TupleDomainOrcPredicate.ColumnReference<>(columnHandle, columnHandle.getHiveColumnIndex(), typeManager.getType(columnHandle.getTypeSignature()))); + } + } + else { + physicalColumnHandles.add(new HiveColumnHandle( + column.getName(), + toHiveType(column.getType()), + column.getType().getTypeSignature(), + nextMissingColumnIndex++, + REGULAR, + Optional.empty(), + Optional.empty())); + } + } + + TupleDomain hiveColumnHandleTupleDomain = effectivePredicate.transform(column -> { + IcebergOrcColumn icebergOrcColumn; + if (fileOrcColumnByIcebergId.isEmpty()) { + icebergOrcColumn = fileOrcColumnsByName.get(column.getName()); + } + else { + icebergOrcColumn = fileOrcColumnByIcebergId.get(column.getId()); + if (icebergOrcColumn == null) { + // Cannot get orc column from 'fileOrcColumnByIcebergId', which means SchemaEvolution may have happened, so we get orc column by column name. + icebergOrcColumn = fileOrcColumnsByName.get(column.getName()); + } + } + + return new HiveColumnHandle( + column.getName(), + toHiveType(column.getType()), + column.getType().getTypeSignature(), + // Note: the HiveColumnHandle.hiveColumnIndex starts from '0' while the IcebergColumnHandle.id starts from '1' + icebergOrcColumn != null ? icebergOrcColumn.getOrcColumnId() : column.getId() - 1, + icebergOrcColumn != null ? icebergOrcColumn.getColumnType() : REGULAR, + Optional.empty(), + Optional.empty()); + }); + + OrcPredicate predicate = new TupleDomainOrcPredicate<>(hiveColumnHandleTupleDomain, columnReferences.build(), orcBloomFiltersEnabled, Optional.of(domainCompactionThreshold)); + + OrcAggregatedMemoryContext systemMemoryUsage = new HiveOrcAggregatedMemoryContext(); + OrcBatchRecordReader recordReader = reader.createBatchRecordReader( + includedColumns.build(), + predicate, + start, + length, + UTC, + systemMemoryUsage, + INITIAL_BATCH_SIZE); + + return new OrcBatchPageSource( + recordReader, + orcDataSource, + physicalColumnHandles, + typeManager, + systemMemoryUsage, + stats, + new RuntimeStats()); + } + catch (Exception e) { + if (orcDataSource != null) { + try { + orcDataSource.close(); + } + catch (IOException ignored) { + } + } + if (e instanceof PrestoException) { + throw (PrestoException) e; + } + String message = format("Error opening Iceberg split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage()); + if (e instanceof BlockMissingException) { + throw new PrestoException(ICEBERG_MISSING_DATA, message, e); + } + throw new PrestoException(ICEBERG_CANNOT_OPEN_SPLIT, message, e); + } + } + + private static List getFileOrcColumns(OrcReader reader) + { + List orcTypes = reader.getFooter().getTypes(); + OrcType rootOrcType = orcTypes.get(ROOT_COLUMN_ID); + + List columnAttributes = ImmutableList.of(); + if (rootOrcType.getOrcTypeKind() == OrcType.OrcTypeKind.STRUCT) { + columnAttributes = IntStream.range(0, rootOrcType.getFieldCount()) + .mapToObj(fieldId -> new IcebergOrcColumn( + fieldId, + rootOrcType.getFieldTypeIndex(fieldId), + // We will filter out iceberg column by 'ORC_ICEBERG_ID_KEY' later, + // so we use 'Optional.empty()' temporarily. + Optional.empty(), + rootOrcType.getFieldName(fieldId), + REGULAR, + orcTypes.get(rootOrcType.getFieldTypeIndex(fieldId)).getOrcTypeKind(), + orcTypes.get(rootOrcType.getFieldTypeIndex(fieldId)).getAttributes())) + .collect(toImmutableList()); + } + else if (rootOrcType.getOrcTypeKind() == OrcType.OrcTypeKind.LIST) { + columnAttributes = ImmutableList.of( + new IcebergOrcColumn( + 0, + rootOrcType.getFieldTypeIndex(0), + Optional.empty(), + "item", + REGULAR, + orcTypes.get(rootOrcType.getFieldTypeIndex(0)).getOrcTypeKind(), + orcTypes.get(rootOrcType.getFieldTypeIndex(0)).getAttributes())); + } + else if (rootOrcType.getOrcTypeKind() == OrcType.OrcTypeKind.MAP) { + columnAttributes = ImmutableList.of( + new IcebergOrcColumn( + 0, + rootOrcType.getFieldTypeIndex(0), + Optional.empty(), + "key", + REGULAR, + orcTypes.get(rootOrcType.getFieldTypeIndex(0)).getOrcTypeKind(), + orcTypes.get(rootOrcType.getFieldTypeIndex(0)).getAttributes()), + new IcebergOrcColumn( + 1, + rootOrcType.getFieldTypeIndex(1), + Optional.empty(), + "value", + REGULAR, + orcTypes.get(rootOrcType.getFieldTypeIndex(1)).getOrcTypeKind(), + orcTypes.get(rootOrcType.getFieldTypeIndex(1)).getAttributes())); + } + else if (rootOrcType.getOrcTypeKind() == OrcType.OrcTypeKind.UNION) { + columnAttributes = IntStream.range(0, rootOrcType.getFieldCount()) + .mapToObj(fieldId -> new IcebergOrcColumn( + fieldId, + rootOrcType.getFieldTypeIndex(fieldId), + Optional.empty(), + "field" + fieldId, + REGULAR, + orcTypes.get(rootOrcType.getFieldTypeIndex(fieldId)).getOrcTypeKind(), + orcTypes.get(rootOrcType.getFieldTypeIndex(fieldId)).getAttributes())) + .collect(toImmutableList()); + } + return columnAttributes; + } } diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergSessionProperties.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergSessionProperties.java index 58230dc93ab43..f5d2bf76ae656 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergSessionProperties.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergSessionProperties.java @@ -15,8 +15,11 @@ import com.facebook.presto.hive.HiveClientConfig; import com.facebook.presto.hive.HiveCompressionCodec; +import com.facebook.presto.hive.OrcFileWriterConfig; import com.facebook.presto.hive.ParquetFileWriterConfig; +import com.facebook.presto.orc.OrcWriteValidation; import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.session.PropertyMetadata; import com.google.common.collect.ImmutableList; import io.airlift.units.DataSize; @@ -24,10 +27,18 @@ import javax.inject.Inject; import java.util.List; +import java.util.concurrent.ThreadLocalRandom; +import static com.facebook.presto.common.type.DoubleType.DOUBLE; import static com.facebook.presto.common.type.VarcharType.VARCHAR; import static com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType; +import static com.facebook.presto.spi.StandardErrorCode.INVALID_SESSION_PROPERTY; import static com.facebook.presto.spi.session.PropertyMetadata.booleanProperty; +import static com.facebook.presto.spi.session.PropertyMetadata.integerProperty; +import static com.facebook.presto.spi.session.PropertyMetadata.stringProperty; +import static com.google.common.base.Preconditions.checkArgument; +import static java.lang.String.format; +import static java.util.Locale.ENGLISH; public final class IcebergSessionProperties { @@ -39,13 +50,32 @@ public final class IcebergSessionProperties private static final String PARQUET_USE_COLUMN_NAMES = "parquet_use_column_names"; private static final String PARQUET_BATCH_READ_OPTIMIZATION_ENABLED = "parquet_batch_read_optimization_enabled"; private static final String PARQUET_BATCH_READER_VERIFICATION_ENABLED = "parquet_batch_reader_verification_enabled"; + private static final String ORC_BLOOM_FILTERS_ENABLED = "orc_bloom_filters_enabled"; + private static final String ORC_MAX_MERGE_DISTANCE = "orc_max_merge_distance"; + private static final String ORC_MAX_BUFFER_SIZE = "orc_max_buffer_size"; + private static final String ORC_STREAM_BUFFER_SIZE = "orc_stream_buffer_size"; + private static final String ORC_TINY_STRIPE_THRESHOLD = "orc_tiny_stripe_threshold"; + private static final String ORC_MAX_READ_BLOCK_SIZE = "orc_max_read_block_size"; + private static final String ORC_LAZY_READ_SMALL_RANGES = "orc_lazy_read_small_ranges"; + private static final String ORC_ZSTD_JNI_DECOMPRESSION_ENABLED = "orc_zstd_jni_decompression_enabled"; + private static final String ORC_STRING_STATISTICS_LIMIT = "orc_string_statistics_limit"; + private static final String ORC_OPTIMIZED_WRITER_ENABLED = "orc_optimized_writer_enabled"; + private static final String ORC_OPTIMIZED_WRITER_VALIDATE = "orc_optimized_writer_validate"; + private static final String ORC_OPTIMIZED_WRITER_VALIDATE_PERCENTAGE = "orc_optimized_writer_validate_percentage"; + private static final String ORC_OPTIMIZED_WRITER_VALIDATE_MODE = "orc_optimized_writer_validate_mode"; + private static final String ORC_OPTIMIZED_WRITER_MIN_STRIPE_SIZE = "orc_optimized_writer_min_stripe_size"; + private static final String ORC_OPTIMIZED_WRITER_MAX_STRIPE_SIZE = "orc_optimized_writer_max_stripe_size"; + private static final String ORC_OPTIMIZED_WRITER_MAX_STRIPE_ROWS = "orc_optimized_writer_max_stripe_rows"; + private static final String ORC_OPTIMIZED_WRITER_MAX_DICTIONARY_MEMORY = "orc_optimized_writer_max_dictionary_memory"; + private static final String ORC_COMPRESSION_CODEC = "orc_compression_codec"; private final List> sessionProperties; @Inject public IcebergSessionProperties( IcebergConfig icebergConfig, HiveClientConfig hiveClientConfig, - ParquetFileWriterConfig parquetFileWriterConfig) + ParquetFileWriterConfig parquetFileWriterConfig, + OrcFileWriterConfig orcFileWriterConfig) { sessionProperties = ImmutableList.of( new PropertyMetadata<>( @@ -91,7 +121,113 @@ public IcebergSessionProperties( PARQUET_WRITER_PAGE_SIZE, "Parquet: Writer page size", parquetFileWriterConfig.getPageSize(), - false)); + false), + booleanProperty( + ORC_BLOOM_FILTERS_ENABLED, + "ORC: Enable bloom filters for predicate pushdown", + hiveClientConfig.isOrcBloomFiltersEnabled(), + false), + dataSizeSessionProperty( + ORC_MAX_MERGE_DISTANCE, + "ORC: Maximum size of gap between two reads to merge into a single read", + hiveClientConfig.getOrcMaxMergeDistance(), + false), + dataSizeSessionProperty( + ORC_MAX_BUFFER_SIZE, + "ORC: Maximum size of a single read", + hiveClientConfig.getOrcMaxBufferSize(), + false), + dataSizeSessionProperty( + ORC_STREAM_BUFFER_SIZE, + "ORC: Size of buffer for streaming reads", + hiveClientConfig.getOrcStreamBufferSize(), + false), + dataSizeSessionProperty( + ORC_TINY_STRIPE_THRESHOLD, + "ORC: Threshold below which an ORC stripe or file will read in its entirety", + hiveClientConfig.getOrcTinyStripeThreshold(), + false), + dataSizeSessionProperty( + ORC_MAX_READ_BLOCK_SIZE, + "ORC: Soft max size of Presto blocks produced by ORC reader", + hiveClientConfig.getOrcMaxReadBlockSize(), + false), + booleanProperty( + ORC_LAZY_READ_SMALL_RANGES, + "Experimental: ORC: Read small file segments lazily", + hiveClientConfig.isOrcLazyReadSmallRanges(), + false), + booleanProperty( + ORC_ZSTD_JNI_DECOMPRESSION_ENABLED, + "use JNI based zstd decompression for reading ORC files", + hiveClientConfig.isZstdJniDecompressionEnabled(), + true), + dataSizeSessionProperty( + ORC_STRING_STATISTICS_LIMIT, + "ORC: Maximum size of string statistics; drop if exceeding", + orcFileWriterConfig.getStringStatisticsLimit(), + false), + booleanProperty( + ORC_OPTIMIZED_WRITER_ENABLED, + "Experimental: ORC: Enable optimized writer", + hiveClientConfig.isOrcOptimizedWriterEnabled(), + false), + booleanProperty( + ORC_OPTIMIZED_WRITER_VALIDATE, + "Experimental: ORC: Force all validation for files", + hiveClientConfig.getOrcWriterValidationPercentage() > 0.0, + false), + new PropertyMetadata<>( + ORC_OPTIMIZED_WRITER_VALIDATE_PERCENTAGE, + "Experimental: ORC: sample percentage for validation for files", + DOUBLE, + Double.class, + hiveClientConfig.getOrcWriterValidationPercentage(), + false, + value -> { + double doubleValue = ((Number) value).doubleValue(); + if (doubleValue < 0.0 || doubleValue > 100.0) { + throw new PrestoException( + INVALID_SESSION_PROPERTY, + format("%s must be between 0.0 and 100.0 inclusive: %s", ORC_OPTIMIZED_WRITER_VALIDATE_PERCENTAGE, doubleValue)); + } + return doubleValue; + }, + value -> value), + stringProperty( + ORC_OPTIMIZED_WRITER_VALIDATE_MODE, + "Experimental: ORC: Level of detail in ORC validation", + hiveClientConfig.getOrcWriterValidationMode().toString(), + false), + dataSizeSessionProperty( + ORC_OPTIMIZED_WRITER_MIN_STRIPE_SIZE, + "Experimental: ORC: Min stripe size", + orcFileWriterConfig.getStripeMinSize(), + false), + dataSizeSessionProperty( + ORC_OPTIMIZED_WRITER_MAX_STRIPE_SIZE, + "Experimental: ORC: Max stripe size", + orcFileWriterConfig.getStripeMaxSize(), + false), + integerProperty( + ORC_OPTIMIZED_WRITER_MAX_STRIPE_ROWS, + "Experimental: ORC: Max stripe row count", + orcFileWriterConfig.getStripeMaxRowCount(), + false), + dataSizeSessionProperty( + ORC_OPTIMIZED_WRITER_MAX_DICTIONARY_MEMORY, + "Experimental: ORC: Max dictionary memory", + orcFileWriterConfig.getDictionaryMaxMemory(), + false), + new PropertyMetadata<>( + ORC_COMPRESSION_CODEC, + "The preferred compression codec to use when writing ORC and DWRF files", + VARCHAR, + HiveCompressionCodec.class, + hiveClientConfig.getOrcCompressionCodec(), + false, + value -> HiveCompressionCodec.valueOf(((String) value).toUpperCase()), + HiveCompressionCodec::name)); } public List> getSessionProperties() @@ -136,4 +272,101 @@ public static PropertyMetadata dataSizeSessionProperty(String name, St value -> DataSize.valueOf((String) value), DataSize::toString); } + + public static boolean isOrcBloomFiltersEnabled(ConnectorSession session) + { + return session.getProperty(ORC_BLOOM_FILTERS_ENABLED, Boolean.class); + } + + public static DataSize getOrcMaxMergeDistance(ConnectorSession session) + { + return session.getProperty(ORC_MAX_MERGE_DISTANCE, DataSize.class); + } + + public static DataSize getOrcMaxBufferSize(ConnectorSession session) + { + return session.getProperty(ORC_MAX_BUFFER_SIZE, DataSize.class); + } + + public static DataSize getOrcStreamBufferSize(ConnectorSession session) + { + return session.getProperty(ORC_STREAM_BUFFER_SIZE, DataSize.class); + } + + public static DataSize getOrcTinyStripeThreshold(ConnectorSession session) + { + return session.getProperty(ORC_TINY_STRIPE_THRESHOLD, DataSize.class); + } + + public static DataSize getOrcMaxReadBlockSize(ConnectorSession session) + { + return session.getProperty(ORC_MAX_READ_BLOCK_SIZE, DataSize.class); + } + + public static boolean getOrcLazyReadSmallRanges(ConnectorSession session) + { + return session.getProperty(ORC_LAZY_READ_SMALL_RANGES, Boolean.class); + } + + public static boolean isOrcZstdJniDecompressionEnabled(ConnectorSession session) + { + return session.getProperty(ORC_ZSTD_JNI_DECOMPRESSION_ENABLED, Boolean.class); + } + + public static DataSize getOrcStringStatisticsLimit(ConnectorSession session) + { + return session.getProperty(ORC_STRING_STATISTICS_LIMIT, DataSize.class); + } + + public static boolean isOrcOptimizedWriterEnabled(ConnectorSession session) + { + return session.getProperty(ORC_OPTIMIZED_WRITER_ENABLED, Boolean.class); + } + + public static boolean isOrcOptimizedWriterValidate(ConnectorSession session) + { + boolean validate = session.getProperty(ORC_OPTIMIZED_WRITER_VALIDATE, Boolean.class); + double percentage = session.getProperty(ORC_OPTIMIZED_WRITER_VALIDATE_PERCENTAGE, Double.class); + + checkArgument(percentage >= 0.0 && percentage <= 100.0); + + // session property can disabled validation + if (!validate) { + return false; + } + + // session property can not force validation when sampling is enabled + // todo change this if session properties support null + return ThreadLocalRandom.current().nextDouble(100) < percentage; + } + + public static OrcWriteValidation.OrcWriteValidationMode getOrcOptimizedWriterValidateMode(ConnectorSession session) + { + return OrcWriteValidation.OrcWriteValidationMode.valueOf(session.getProperty(ORC_OPTIMIZED_WRITER_VALIDATE_MODE, String.class).toUpperCase(ENGLISH)); + } + + public static DataSize getOrcOptimizedWriterMinStripeSize(ConnectorSession session) + { + return session.getProperty(ORC_OPTIMIZED_WRITER_MIN_STRIPE_SIZE, DataSize.class); + } + + public static DataSize getOrcOptimizedWriterMaxStripeSize(ConnectorSession session) + { + return session.getProperty(ORC_OPTIMIZED_WRITER_MAX_STRIPE_SIZE, DataSize.class); + } + + public static int getOrcOptimizedWriterMaxStripeRows(ConnectorSession session) + { + return session.getProperty(ORC_OPTIMIZED_WRITER_MAX_STRIPE_ROWS, Integer.class); + } + + public static DataSize getOrcOptimizedWriterMaxDictionaryMemory(ConnectorSession session) + { + return session.getProperty(ORC_OPTIMIZED_WRITER_MAX_DICTIONARY_MEMORY, DataSize.class); + } + + public static HiveCompressionCodec getOrcCompressionCodec(ConnectorSession session) + { + return session.getProperty(ORC_COMPRESSION_CODEC, HiveCompressionCodec.class); + } } diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergUtil.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergUtil.java index 633cdca03cd80..6dc2499066ca0 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergUtil.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergUtil.java @@ -13,6 +13,7 @@ */ package com.facebook.presto.iceberg; +import com.facebook.presto.common.predicate.TupleDomain; import com.facebook.presto.common.type.TypeManager; import com.facebook.presto.hive.HdfsContext; import com.facebook.presto.hive.HdfsEnvironment; @@ -30,6 +31,8 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.Table; import org.apache.iceberg.TableOperations; +import org.apache.iceberg.TableScan; +import org.apache.iceberg.expressions.Expression; import java.util.List; import java.util.Locale; @@ -42,6 +45,7 @@ import static com.facebook.presto.iceberg.TypeConverter.toPrestoType; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.Lists.reverse; +import static com.google.common.collect.Streams.stream; import static java.lang.String.format; import static org.apache.iceberg.BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE; import static org.apache.iceberg.BaseMetastoreTableOperations.TABLE_TYPE_PROP; @@ -141,4 +145,19 @@ private static String quotedName(String name) } return '"' + name.replace("\"", "\"\"") + '"'; } + + public static TableScan getTableScan(TupleDomain predicates, Optional snapshotId, Table icebergTable) + { + Expression expression = ExpressionConverter.toIcebergExpression(predicates); + TableScan tableScan = icebergTable.newScan().filter(expression); + return snapshotId + .map(id -> isSnapshot(icebergTable, id) ? tableScan.useSnapshot(id) : tableScan.asOfTime(id)) + .orElse(tableScan); + } + + private static boolean isSnapshot(Table icebergTable, Long id) + { + return stream(icebergTable.snapshots()) + .anyMatch(snapshot -> snapshot.snapshotId() == id); + } } diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/InternalIcebergConnectorFactory.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/InternalIcebergConnectorFactory.java index b850cbd996118..725e22ac2c9e0 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/InternalIcebergConnectorFactory.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/InternalIcebergConnectorFactory.java @@ -17,8 +17,8 @@ import com.facebook.airlift.bootstrap.LifeCycleManager; import com.facebook.airlift.event.client.EventModule; import com.facebook.airlift.json.JsonModule; -import com.facebook.presto.client.NodeVersion; import com.facebook.presto.common.type.TypeManager; +import com.facebook.presto.hive.NodeVersion; import com.facebook.presto.hive.authentication.HiveAuthenticationModule; import com.facebook.presto.hive.metastore.ExtendedHiveMetastore; import com.facebook.presto.hive.metastore.HiveMetastoreModule; @@ -60,7 +60,7 @@ public static Connector createConnector(String catalogName, Map new EventModule(), new MBeanModule(), new JsonModule(), - new IcebergModule(), + new IcebergModule(catalogName), new IcebergMetastoreModule(), new HiveS3Module(catalogName), new HiveAuthenticationModule(), diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/ManifestsTable.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/ManifestsTable.java index ad6aabf3370f9..295b835607ee2 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/ManifestsTable.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/ManifestsTable.java @@ -14,7 +14,10 @@ package com.facebook.presto.iceberg; import com.facebook.presto.common.Page; +import com.facebook.presto.common.block.BlockBuilder; import com.facebook.presto.common.predicate.TupleDomain; +import com.facebook.presto.common.type.ArrayType; +import com.facebook.presto.common.type.RowType; import com.facebook.presto.iceberg.util.PageListBuilder; import com.facebook.presto.spi.ColumnMetadata; import com.facebook.presto.spi.ConnectorPageSource; @@ -26,19 +29,25 @@ import com.facebook.presto.spi.SystemTable; import com.facebook.presto.spi.connector.ConnectorTransactionHandle; import com.google.common.collect.ImmutableList; +import org.apache.iceberg.ManifestFile; +import org.apache.iceberg.PartitionField; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Snapshot; import org.apache.iceberg.Table; +import org.apache.iceberg.types.Conversions; +import org.apache.iceberg.types.Type; import java.util.List; import java.util.Map; import java.util.Optional; import static com.facebook.presto.common.type.BigintType.BIGINT; +import static com.facebook.presto.common.type.BooleanType.BOOLEAN; import static com.facebook.presto.common.type.IntegerType.INTEGER; import static com.facebook.presto.common.type.VarcharType.VARCHAR; import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA; import static java.lang.String.format; +import static java.util.Arrays.asList; import static java.util.Objects.requireNonNull; public class ManifestsTable @@ -62,6 +71,10 @@ public ManifestsTable(SchemaTableName tableName, Table icebergTable, Optional buildPages(ConnectorTableMetadata tableMetadata, Table pagesBuilder.appendInteger(file.addedFilesCount()); pagesBuilder.appendInteger(file.existingFilesCount()); pagesBuilder.appendInteger(file.deletedFilesCount()); + writePartitionSummaries(pagesBuilder.nextColumn(), file.partitions(), partitionSpecsById.get(file.partitionSpecId())); pagesBuilder.endRow(); }); return pagesBuilder.build(); } + + private static void writePartitionSummaries(BlockBuilder arrayBlockBuilder, List summaries, PartitionSpec partitionSpec) + { + BlockBuilder singleArrayWriter = arrayBlockBuilder.beginBlockEntry(); + for (int i = 0; i < summaries.size(); i++) { + ManifestFile.PartitionFieldSummary summary = summaries.get(i); + PartitionField field = partitionSpec.fields().get(i); + Type nestedType = partitionSpec.partitionType().fields().get(i).type(); + + BlockBuilder rowBuilder = singleArrayWriter.beginBlockEntry(); + BOOLEAN.writeBoolean(rowBuilder, summary.containsNull()); + VARCHAR.writeString(rowBuilder, field.transform().toHumanString( + Conversions.fromByteBuffer(nestedType, summary.lowerBound()))); + VARCHAR.writeString(rowBuilder, field.transform().toHumanString( + Conversions.fromByteBuffer(nestedType, summary.upperBound()))); + singleArrayWriter.closeEntry(); + } + arrayBlockBuilder.closeEntry(); + } } diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/SnapshotsTable.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/SnapshotsTable.java index 322ea0e690bec..6e10dd6b2cf8d 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/SnapshotsTable.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/SnapshotsTable.java @@ -15,7 +15,9 @@ import com.facebook.presto.common.Page; import com.facebook.presto.common.predicate.TupleDomain; +import com.facebook.presto.common.type.StandardTypes; import com.facebook.presto.common.type.TypeManager; +import com.facebook.presto.common.type.TypeSignatureParameter; import com.facebook.presto.iceberg.util.PageListBuilder; import com.facebook.presto.spi.ColumnMetadata; import com.facebook.presto.spi.ConnectorPageSource; @@ -53,6 +55,9 @@ public SnapshotsTable(SchemaTableName tableName, TypeManager typeManager, Table .add(new ColumnMetadata("parent_id", BIGINT)) .add(new ColumnMetadata("operation", VARCHAR)) .add(new ColumnMetadata("manifest_list", VARCHAR)) + .add(new ColumnMetadata("summary", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of( + TypeSignatureParameter.of(VARCHAR.getTypeSignature()), + TypeSignatureParameter.of(VARCHAR.getTypeSignature()))))) .build()); } @@ -91,6 +96,9 @@ private static List buildPages(ConnectorTableMetadata tableMetadata, Conne if (checkNonNull(snapshot.manifestListLocation(), pagesBuilder)) { pagesBuilder.appendVarchar(snapshot.manifestListLocation()); } + if (checkNonNull(snapshot.summary(), pagesBuilder)) { + pagesBuilder.appendVarcharVarcharMap(snapshot.summary()); + } pagesBuilder.endRow(); }); diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/TypeConverter.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/TypeConverter.java index c00c5b86de33b..8058b224f49b4 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/TypeConverter.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/TypeConverter.java @@ -16,11 +16,13 @@ import com.facebook.presto.common.type.ArrayType; import com.facebook.presto.common.type.BigintType; import com.facebook.presto.common.type.BooleanType; +import com.facebook.presto.common.type.CharType; import com.facebook.presto.common.type.DateType; import com.facebook.presto.common.type.DecimalType; import com.facebook.presto.common.type.DoubleType; import com.facebook.presto.common.type.IntegerType; import com.facebook.presto.common.type.MapType; +import com.facebook.presto.common.type.NamedTypeSignature; import com.facebook.presto.common.type.RealType; import com.facebook.presto.common.type.RowType; import com.facebook.presto.common.type.StandardTypes; @@ -33,17 +35,57 @@ import com.facebook.presto.common.type.TypeSignatureParameter; import com.facebook.presto.common.type.VarbinaryType; import com.facebook.presto.common.type.VarcharType; +import com.facebook.presto.hive.HiveType; +import com.facebook.presto.orc.metadata.OrcType; import com.facebook.presto.spi.PrestoException; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.iceberg.Schema; import org.apache.iceberg.types.Types; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Optional; +import static com.facebook.presto.common.type.BigintType.BIGINT; +import static com.facebook.presto.common.type.BooleanType.BOOLEAN; +import static com.facebook.presto.common.type.DateType.DATE; +import static com.facebook.presto.common.type.DoubleType.DOUBLE; +import static com.facebook.presto.common.type.IntegerType.INTEGER; +import static com.facebook.presto.common.type.RealType.REAL; +import static com.facebook.presto.common.type.SmallintType.SMALLINT; +import static com.facebook.presto.common.type.TimestampType.TIMESTAMP; +import static com.facebook.presto.common.type.TimestampWithTimeZoneType.TIMESTAMP_WITH_TIME_ZONE; +import static com.facebook.presto.common.type.TinyintType.TINYINT; +import static com.facebook.presto.common.type.VarbinaryType.VARBINARY; +import static com.facebook.presto.hive.HiveType.HIVE_BINARY; +import static com.facebook.presto.hive.HiveType.HIVE_BOOLEAN; +import static com.facebook.presto.hive.HiveType.HIVE_BYTE; +import static com.facebook.presto.hive.HiveType.HIVE_DATE; +import static com.facebook.presto.hive.HiveType.HIVE_DOUBLE; +import static com.facebook.presto.hive.HiveType.HIVE_FLOAT; +import static com.facebook.presto.hive.HiveType.HIVE_INT; +import static com.facebook.presto.hive.HiveType.HIVE_LONG; +import static com.facebook.presto.hive.HiveType.HIVE_SHORT; +import static com.facebook.presto.hive.HiveType.HIVE_STRING; +import static com.facebook.presto.hive.HiveType.HIVE_TIMESTAMP; +import static com.facebook.presto.hive.metastore.MetastoreUtil.isArrayType; +import static com.facebook.presto.hive.metastore.MetastoreUtil.isMapType; +import static com.facebook.presto.hive.metastore.MetastoreUtil.isRowType; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.google.common.collect.ImmutableList.toImmutableList; import static java.lang.String.format; +import static java.util.stream.Collectors.toList; +import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo; +import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getListTypeInfo; +import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getMapTypeInfo; +import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo; +import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getVarcharTypeInfo; public final class TypeConverter { @@ -144,6 +186,11 @@ public static org.apache.iceberg.types.Type toIcebergType(Type type) throw new PrestoException(NOT_SUPPORTED, "Type not supported for Iceberg: " + type.getDisplayName()); } + public static HiveType toHiveType(Type type) + { + return HiveType.toHiveType(toHiveTypeInfo(type)); + } + private static org.apache.iceberg.types.Type fromDecimal(DecimalType type) { return Types.DecimalType.of(type.getPrecision(), type.getScale()); @@ -169,4 +216,223 @@ private static org.apache.iceberg.types.Type fromMap(MapType type) { return Types.MapType.ofOptional(1, 2, toIcebergType(type.getKeyType()), toIcebergType(type.getValueType())); } + + private static TypeInfo toHiveTypeInfo(Type type) + { + if (BOOLEAN.equals(type)) { + return HIVE_BOOLEAN.getTypeInfo(); + } + if (BIGINT.equals(type)) { + return HIVE_LONG.getTypeInfo(); + } + if (INTEGER.equals(type)) { + return HIVE_INT.getTypeInfo(); + } + if (SMALLINT.equals(type)) { + return HIVE_SHORT.getTypeInfo(); + } + if (TINYINT.equals(type)) { + return HIVE_BYTE.getTypeInfo(); + } + if (REAL.equals(type)) { + return HIVE_FLOAT.getTypeInfo(); + } + if (DOUBLE.equals(type)) { + return HIVE_DOUBLE.getTypeInfo(); + } + if (type instanceof VarcharType) { + VarcharType varcharType = (VarcharType) type; + if (varcharType.isUnbounded()) { + return HIVE_STRING.getTypeInfo(); + } + if (varcharType.getLengthSafe() <= HiveVarchar.MAX_VARCHAR_LENGTH) { + return getVarcharTypeInfo(varcharType.getLengthSafe()); + } + throw new PrestoException(NOT_SUPPORTED, format("Unsupported Hive type: %s. Supported VARCHAR types: VARCHAR(<=%d), VARCHAR.", type, HiveVarchar.MAX_VARCHAR_LENGTH)); + } + if (type instanceof CharType) { + CharType charType = (CharType) type; + int charLength = charType.getLength(); + if (charLength <= HiveChar.MAX_CHAR_LENGTH) { + return getCharTypeInfo(charLength); + } + throw new PrestoException(NOT_SUPPORTED, format("Unsupported Hive type: %s. Supported CHAR types: CHAR(<=%d).", + type, HiveChar.MAX_CHAR_LENGTH)); + } + if (VARBINARY.equals(type)) { + return HIVE_BINARY.getTypeInfo(); + } + if (DATE.equals(type)) { + return HIVE_DATE.getTypeInfo(); + } + if (TIMESTAMP.equals(type)) { + return HIVE_TIMESTAMP.getTypeInfo(); + } + if (TIMESTAMP_WITH_TIME_ZONE.equals(type)) { + // Hive does not have TIMESTAMP_WITH_TIME_ZONE, this is just a work around for iceberg. + return HIVE_TIMESTAMP.getTypeInfo(); + } + if (type instanceof DecimalType) { + DecimalType decimalType = (DecimalType) type; + return new DecimalTypeInfo(decimalType.getPrecision(), decimalType.getScale()); + } + if (isArrayType(type)) { + TypeInfo elementType = toHiveTypeInfo(type.getTypeParameters().get(0)); + return getListTypeInfo(elementType); + } + if (isMapType(type)) { + TypeInfo keyType = toHiveTypeInfo(type.getTypeParameters().get(0)); + TypeInfo valueType = toHiveTypeInfo(type.getTypeParameters().get(1)); + return getMapTypeInfo(keyType, valueType); + } + if (isRowType(type)) { + ImmutableList.Builder fieldNames = ImmutableList.builder(); + for (TypeSignatureParameter parameter : type.getTypeSignature().getParameters()) { + if (!parameter.isNamedTypeSignature()) { + throw new IllegalArgumentException(format("Expected all parameters to be named type, but got %s", parameter)); + } + NamedTypeSignature namedTypeSignature = parameter.getNamedTypeSignature(); + if (!namedTypeSignature.getName().isPresent()) { + throw new PrestoException(NOT_SUPPORTED, format("Anonymous row type is not supported in Hive. Please give each field a name: %s", type)); + } + fieldNames.add(namedTypeSignature.getName().get()); + } + return getStructTypeInfo( + fieldNames.build(), + type.getTypeParameters().stream() + .map(TypeConverter::toHiveTypeInfo) + .collect(toList())); + } + throw new PrestoException(NOT_SUPPORTED, format("Unsupported Hive type: %s", type)); + } + + public static List toOrcType(Schema schema) + { + return toOrcStructType(0, schema.asStruct(), ImmutableMap.of()); + } + + private static List toOrcType(int nextFieldTypeIndex, org.apache.iceberg.types.Type type, Map attributes) + { + switch (type.typeId()) { + case BOOLEAN: + return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.BOOLEAN, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes)); + case INTEGER: + return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.INT, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes)); + case LONG: + return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.LONG, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes)); + case FLOAT: + return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.FLOAT, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes)); + case DOUBLE: + return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.DOUBLE, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes)); + case DATE: + return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.DATE, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes)); + case TIME: + return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.INT, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes)); + case TIMESTAMP: + return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.TIMESTAMP, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes)); + case STRING: + return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.STRING, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes)); + case UUID: + return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.BINARY, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes)); + case FIXED: + return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.BINARY, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes)); + case BINARY: + return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.BINARY, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes)); + case DECIMAL: + Types.DecimalType decimalType = (Types.DecimalType) type; + return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.DECIMAL, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.of(decimalType.precision()), Optional.of(decimalType.scale()), attributes)); + case STRUCT: + return toOrcStructType(nextFieldTypeIndex, (Types.StructType) type, attributes); + case LIST: + return toOrcListType(nextFieldTypeIndex, (Types.ListType) type, attributes); + case MAP: + return toOrcMapType(nextFieldTypeIndex, (Types.MapType) type, attributes); + default: + throw new PrestoException(NOT_SUPPORTED, "Unsupported Iceberg type: " + type); + } + } + + private static List toOrcStructType(int nextFieldTypeIndex, Types.StructType structType, Map attributes) + { + nextFieldTypeIndex++; + List fieldTypeIndexes = new ArrayList<>(); + List fieldNames = new ArrayList<>(); + List> fieldTypesList = new ArrayList<>(); + for (Types.NestedField field : structType.fields()) { + fieldTypeIndexes.add(nextFieldTypeIndex); + fieldNames.add(field.name()); + Map fieldAttributes = ImmutableMap.builder() + .put(ORC_ICEBERG_ID_KEY, Integer.toString(field.fieldId())) + .put(ORC_ICEBERG_REQUIRED_KEY, Boolean.toString(field.isRequired())) + .build(); + List fieldOrcTypes = toOrcType(nextFieldTypeIndex, field.type(), fieldAttributes); + fieldTypesList.add(fieldOrcTypes); + nextFieldTypeIndex += fieldOrcTypes.size(); + } + + ImmutableList.Builder orcTypes = ImmutableList.builder(); + orcTypes.add(new OrcType( + OrcType.OrcTypeKind.STRUCT, + fieldTypeIndexes, + fieldNames, + Optional.empty(), + Optional.empty(), + Optional.empty(), + attributes)); + fieldTypesList.forEach(orcTypes::addAll); + + return orcTypes.build(); + } + + private static List toOrcListType(int nextFieldTypeIndex, Types.ListType listType, Map attributes) + { + nextFieldTypeIndex++; + Map elementAttributes = ImmutableMap.builder() + .put(ORC_ICEBERG_ID_KEY, Integer.toString(listType.elementId())) + .put(ORC_ICEBERG_REQUIRED_KEY, Boolean.toString(listType.isElementRequired())) + .build(); + List itemTypes = toOrcType(nextFieldTypeIndex, listType.elementType(), elementAttributes); + + List orcTypes = new ArrayList<>(); + orcTypes.add(new OrcType( + OrcType.OrcTypeKind.LIST, + ImmutableList.of(nextFieldTypeIndex), + ImmutableList.of("item"), + Optional.empty(), + Optional.empty(), + Optional.empty(), + attributes)); + + orcTypes.addAll(itemTypes); + return orcTypes; + } + + private static List toOrcMapType(int nextFieldTypeIndex, Types.MapType mapType, Map attributes) + { + nextFieldTypeIndex++; + Map keyAttributes = ImmutableMap.builder() + .put(ORC_ICEBERG_ID_KEY, Integer.toString(mapType.keyId())) + .put(ORC_ICEBERG_REQUIRED_KEY, Boolean.toString(true)) + .build(); + List keyTypes = toOrcType(nextFieldTypeIndex, mapType.keyType(), keyAttributes); + Map valueAttributes = ImmutableMap.builder() + .put(ORC_ICEBERG_ID_KEY, Integer.toString(mapType.valueId())) + .put(ORC_ICEBERG_REQUIRED_KEY, Boolean.toString(mapType.isValueRequired())) + .build(); + List valueTypes = toOrcType(nextFieldTypeIndex + keyTypes.size(), mapType.valueType(), valueAttributes); + + List orcTypes = new ArrayList<>(); + orcTypes.add(new OrcType( + OrcType.OrcTypeKind.MAP, + ImmutableList.of(nextFieldTypeIndex, nextFieldTypeIndex + keyTypes.size()), + ImmutableList.of("key", "value"), + Optional.empty(), + Optional.empty(), + Optional.empty(), + attributes)); + + orcTypes.addAll(keyTypes); + orcTypes.addAll(valueTypes); + return orcTypes; + } } diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergMetadataListing.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergMetadataListing.java new file mode 100644 index 0000000000000..9b51febe10b07 --- /dev/null +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergMetadataListing.java @@ -0,0 +1,120 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg; + +import com.facebook.presto.Session; +import com.facebook.presto.hive.HivePlugin; +import com.facebook.presto.spi.security.Identity; +import com.facebook.presto.spi.security.SelectedRole; +import com.facebook.presto.testing.QueryRunner; +import com.facebook.presto.tests.AbstractTestQueryFramework; +import com.facebook.presto.tests.DistributedQueryRunner; +import com.google.common.collect.ImmutableMap; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.nio.file.Path; +import java.util.Map; +import java.util.Optional; + +import static com.facebook.presto.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; +import static com.facebook.presto.spi.security.SelectedRole.Type.ROLE; +import static com.facebook.presto.testing.TestingSession.testSessionBuilder; + +public class TestIcebergMetadataListing + extends AbstractTestQueryFramework +{ + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + Session session = testSessionBuilder() + .setIdentity(new Identity( + "hive", + Optional.empty(), + ImmutableMap.of("hive", new SelectedRole(ROLE, Optional.of("admin"))), + ImmutableMap.of(), + ImmutableMap.of())) + .build(); + DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(session).build(); + + Path dataDir = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data"); + + queryRunner.installPlugin(new IcebergPlugin()); + Map icebergProperties = ImmutableMap.builder() + .put("hive.metastore", "file") + .put("hive.metastore.catalog.dir", dataDir.toString() + "/catalog") + .build(); + + queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", icebergProperties); + + queryRunner.installPlugin(new HivePlugin("hive")); + Map hiveProperties = ImmutableMap.builder() + .put("hive.metastore", "file") + .put("hive.metastore.catalog.dir", dataDir.toString() + "/catalog") + .put("hive.security", "sql-standard") + .build(); + + queryRunner.createCatalog("hive", "hive", hiveProperties); + + return queryRunner; + } + + @BeforeClass + public void setUp() + { + assertQuerySucceeds("CREATE SCHEMA hive.test_schema"); + assertQuerySucceeds("CREATE TABLE iceberg.test_schema.iceberg_table1 (_string VARCHAR, _integer INTEGER)"); + assertQuerySucceeds("CREATE TABLE iceberg.test_schema.iceberg_table2 (_double DOUBLE) WITH (partitioning = ARRAY['_double'])"); + assertQuerySucceeds("CREATE TABLE hive.test_schema.hive_table (_double DOUBLE)"); + } + + @AfterClass(alwaysRun = true) + public void tearDown() + { + assertQuerySucceeds("DROP TABLE IF EXISTS hive.test_schema.hive_table"); + assertQuerySucceeds("DROP TABLE IF EXISTS iceberg.test_schema.iceberg_table2"); + assertQuerySucceeds("DROP TABLE IF EXISTS iceberg.test_schema.iceberg_table1"); + assertQuerySucceeds("DROP SCHEMA IF EXISTS hive.test_schema"); + } + + @Test + public void testTableListing() + { + // For now, iceberg connector will show all the tables(iceberg and non-iceberg) under a schema. + assertQuery("SHOW TABLES FROM iceberg.test_schema", "VALUES 'iceberg_table1', 'iceberg_table2', 'hive_table'"); + } + + @Test + public void testTableColumnListing() + { + // Verify information_schema.columns does not include columns from non-Iceberg tables + assertQuery("SELECT table_name, column_name FROM iceberg.information_schema.columns WHERE table_schema = 'test_schema'", + "VALUES ('iceberg_table1', '_string'), ('iceberg_table1', '_integer'), ('iceberg_table2', '_double')"); + } + + @Test + public void testTableDescribing() + { + assertQuery("DESCRIBE iceberg.test_schema.iceberg_table1", "VALUES ('_string', 'varchar', '', ''), ('_integer', 'integer', '', '')"); + } + + @Test + public void testTableValidation() + { + assertQuerySucceeds("SELECT * FROM iceberg.test_schema.iceberg_table1"); + assertQueryFails("SELECT * FROM iceberg.test_schema.hive_table", "Not an Iceberg table: test_schema.hive_table"); + } +} diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergOrcMetricsCollection.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergOrcMetricsCollection.java new file mode 100644 index 0000000000000..3f7acf7f413f1 --- /dev/null +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergOrcMetricsCollection.java @@ -0,0 +1,297 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg; + +import com.facebook.presto.Session; +import com.facebook.presto.testing.MaterializedResult; +import com.facebook.presto.testing.MaterializedRow; +import com.facebook.presto.testing.QueryRunner; +import com.facebook.presto.tests.AbstractTestQueryFramework; +import com.facebook.presto.tests.DistributedQueryRunner; +import com.facebook.presto.tpch.TpchPlugin; +import com.google.common.collect.ImmutableMap; +import org.testng.annotations.Test; + +import java.nio.file.Path; +import java.util.Map; + +import static com.facebook.presto.SystemSessionProperties.MAX_DRIVERS_PER_TASK; +import static com.facebook.presto.SystemSessionProperties.TASK_CONCURRENCY; +import static com.facebook.presto.SystemSessionProperties.TASK_WRITER_COUNT; +import static com.facebook.presto.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; +import static com.facebook.presto.iceberg.TestIcebergOrcMetricsCollection.DataFileRecord.toDataFileRecord; +import static com.facebook.presto.testing.TestingSession.testSessionBuilder; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNull; + +public class TestIcebergOrcMetricsCollection + extends AbstractTestQueryFramework +{ + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + Session session = testSessionBuilder() + .setCatalog(ICEBERG_CATALOG) + .setSchema("test_schema") + .setSystemProperty(TASK_CONCURRENCY, "1") + .setSystemProperty(TASK_WRITER_COUNT, "1") + .setSystemProperty(MAX_DRIVERS_PER_TASK, "1") + .setCatalogSessionProperty(ICEBERG_CATALOG, "orc_string_statistics_limit", Integer.MAX_VALUE + "B") + .build(); + DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(session) + .setNodeCount(1) + .build(); + + queryRunner.installPlugin(new TpchPlugin()); + queryRunner.createCatalog("tpch", "tpch"); + + Path dataDir = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data"); + + queryRunner.installPlugin(new IcebergPlugin()); + Map icebergProperties = ImmutableMap.builder() + .put("hive.metastore", "file") + .put("hive.metastore.catalog.dir", dataDir.toString() + "/catalog") + .build(); + + queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", icebergProperties); + + queryRunner.execute("CREATE SCHEMA test_schema"); + + return queryRunner; + } + + @Test + public void testBasic() + { + assertUpdate("CREATE TABLE orders WITH (format = 'ORC') AS SELECT * FROM tpch.tiny.orders", 15000); + MaterializedResult materializedResult = computeActual("SELECT * FROM \"orders$files\""); + assertEquals(materializedResult.getRowCount(), 1); + DataFileRecord datafile = toDataFileRecord(materializedResult.getMaterializedRows().get(0)); + + // Check file format + assertEquals(datafile.getFileFormat(), "ORC"); + + // Check file row count + assertEquals(datafile.getRecordCount(), 15000L); + + // Check per-column value count + datafile.getValueCounts().values().forEach(valueCount -> assertEquals(valueCount, (Long) 15000L)); + + // Check per-column null value count + datafile.getNullValueCounts().values().forEach(nullValueCount -> assertEquals(nullValueCount, (Long) 0L)); + + // Check per-column lower bound + Map lowerBounds = datafile.getLowerBounds(); + assertQuery("SELECT min(orderkey) FROM tpch.tiny.orders", "VALUES " + lowerBounds.get(1)); + assertQuery("SELECT min(custkey) FROM tpch.tiny.orders", "VALUES " + lowerBounds.get(2)); + assertQuery("SELECT min(orderstatus) FROM tpch.tiny.orders", "VALUES '" + lowerBounds.get(3) + "'"); + assertQuery("SELECT min(totalprice) FROM tpch.tiny.orders", "VALUES " + lowerBounds.get(4)); + assertQuery("SELECT min(orderdate) FROM tpch.tiny.orders", "VALUES DATE '" + lowerBounds.get(5) + "'"); + assertQuery("SELECT min(orderpriority) FROM tpch.tiny.orders", "VALUES '" + lowerBounds.get(6) + "'"); + assertQuery("SELECT min(clerk) FROM tpch.tiny.orders", "VALUES '" + lowerBounds.get(7) + "'"); + assertQuery("SELECT min(shippriority) FROM tpch.tiny.orders", "VALUES " + lowerBounds.get(8)); + assertQuery("SELECT min(comment) FROM tpch.tiny.orders", "VALUES '" + lowerBounds.get(9) + "'"); + + // Check per-column upper bound + Map upperBounds = datafile.getUpperBounds(); + assertQuery("SELECT max(orderkey) FROM tpch.tiny.orders", "VALUES " + upperBounds.get(1)); + assertQuery("SELECT max(custkey) FROM tpch.tiny.orders", "VALUES " + upperBounds.get(2)); + assertQuery("SELECT max(orderstatus) FROM tpch.tiny.orders", "VALUES '" + upperBounds.get(3) + "'"); + assertQuery("SELECT max(totalprice) FROM tpch.tiny.orders", "VALUES " + upperBounds.get(4)); + assertQuery("SELECT max(orderdate) FROM tpch.tiny.orders", "VALUES DATE '" + upperBounds.get(5) + "'"); + assertQuery("SELECT max(orderpriority) FROM tpch.tiny.orders", "VALUES '" + upperBounds.get(6) + "'"); + assertQuery("SELECT max(clerk) FROM tpch.tiny.orders", "VALUES '" + upperBounds.get(7) + "'"); + assertQuery("SELECT max(shippriority) FROM tpch.tiny.orders", "VALUES " + upperBounds.get(8)); + assertQuery("SELECT max(comment) FROM tpch.tiny.orders", "VALUES '" + upperBounds.get(9) + "'"); + + assertUpdate("DROP TABLE orders"); + } + + @Test + public void testWithNulls() + { + assertUpdate("CREATE TABLE test_with_nulls (_integer INTEGER, _real REAL, _string VARCHAR) WITH (format = 'ORC')"); + assertUpdate("INSERT INTO test_with_nulls VALUES (7, 3.4, 'aaa'), (3, 4.5, 'bbb'), (4, null, 'ccc'), (null, null, 'ddd')", 4); + MaterializedResult materializedResult = computeActual("SELECT * FROM \"test_with_nulls$files\""); + assertEquals(materializedResult.getRowCount(), 1); + DataFileRecord datafile = toDataFileRecord(materializedResult.getMaterializedRows().get(0)); + + // Check per-column value count + datafile.getValueCounts().values().forEach(valueCount -> assertEquals(valueCount, (Long) 4L)); + + // Check per-column null value count + assertEquals(datafile.getNullValueCounts().get(1), (Long) 1L); + assertEquals(datafile.getNullValueCounts().get(2), (Long) 2L); + assertEquals(datafile.getNullValueCounts().get(3), (Long) 0L); + + // Check per-column lower bound + assertEquals(datafile.getLowerBounds().get(1), "3"); + assertEquals(datafile.getLowerBounds().get(2), "3.4"); + assertEquals(datafile.getLowerBounds().get(3), "aaa"); + + assertUpdate("DROP TABLE test_with_nulls"); + + assertUpdate("CREATE TABLE test_all_nulls (_integer INTEGER) WITH (format = 'ORC')"); + assertUpdate("INSERT INTO test_all_nulls VALUES null, null, null", 3); + materializedResult = computeActual("SELECT * FROM \"test_all_nulls$files\""); + assertEquals(materializedResult.getRowCount(), 1); + datafile = toDataFileRecord(materializedResult.getMaterializedRows().get(0)); + + // Check per-column value count + assertEquals(datafile.getValueCounts().get(1), (Long) 3L); + + // Check per-column null value count + assertEquals(datafile.getNullValueCounts().get(1), (Long) 3L); + + // Check that lower bounds and upper bounds are nulls. (There's no non-null record) + assertNull(datafile.getLowerBounds()); + assertNull(datafile.getUpperBounds()); + + assertUpdate("DROP TABLE test_all_nulls"); + } + + @Test + public void testNestedTypes() + { + assertUpdate("CREATE TABLE test_nested_types (col1 INTEGER, col2 ROW (f1 INTEGER, f2 ARRAY(INTEGER), f3 DOUBLE)) WITH (format = 'ORC')"); + assertUpdate("INSERT INTO test_nested_types VALUES " + + "(7, ROW(3, ARRAY[10, 11, 19], 1.9)), " + + "(-9, ROW(4, ARRAY[13, 16, 20], -2.9)), " + + "(8, ROW(0, ARRAY[14, 17, 21], 3.9)), " + + "(3, ROW(10, ARRAY[15, 18, 22], 4.9))", 4); + MaterializedResult materializedResult = computeActual("SELECT * FROM \"test_nested_types$files\""); + assertEquals(materializedResult.getRowCount(), 1); + DataFileRecord datafile = toDataFileRecord(materializedResult.getMaterializedRows().get(0)); + + Map lowerBounds = datafile.getLowerBounds(); + Map upperBounds = datafile.getUpperBounds(); + + // Only + // 1. top-level primitive columns + // 2. and nested primitive fields that are not descendants of LISTs or MAPs + // should appear in lowerBounds or UpperBounds + assertEquals(lowerBounds.size(), 3); + assertEquals(upperBounds.size(), 3); + + // col1 + assertEquals(lowerBounds.get(1), "-9"); + assertEquals(upperBounds.get(1), "8"); + + // col2.f1 (key in lowerBounds/upperBounds is Iceberg ID) + assertEquals(lowerBounds.get(3), "0"); + assertEquals(upperBounds.get(3), "10"); + + // col2.f3 (key in lowerBounds/upperBounds is Iceberg ID) + assertEquals(lowerBounds.get(5), "-2.9"); + assertEquals(upperBounds.get(5), "4.9"); + + assertUpdate("DROP TABLE test_nested_types"); + } + + public static class DataFileRecord + { + private final String filePath; + private final String fileFormat; + private final long recordCount; + private final long fileSizeInBytes; + private final Map columnSizes; + private final Map valueCounts; + private final Map nullValueCounts; + private final Map lowerBounds; + private final Map upperBounds; + + public static DataFileRecord toDataFileRecord(MaterializedRow row) + { + assertEquals(row.getFieldCount(), 11); + return new DataFileRecord( + (String) row.getField(0), + (String) row.getField(1), + (long) row.getField(2), + (long) row.getField(3), + row.getField(4) != null ? ImmutableMap.copyOf((Map) row.getField(4)) : null, + row.getField(5) != null ? ImmutableMap.copyOf((Map) row.getField(5)) : null, + row.getField(6) != null ? ImmutableMap.copyOf((Map) row.getField(6)) : null, + row.getField(7) != null ? ImmutableMap.copyOf((Map) row.getField(7)) : null, + row.getField(8) != null ? ImmutableMap.copyOf((Map) row.getField(8)) : null); + } + + private DataFileRecord( + String filePath, + String fileFormat, + long recordCount, + long fileSizeInBytes, + Map columnSizes, + Map valueCounts, + Map nullValueCounts, + Map lowerBounds, + Map upperBounds) + { + this.filePath = filePath; + this.fileFormat = fileFormat; + this.recordCount = recordCount; + this.fileSizeInBytes = fileSizeInBytes; + this.columnSizes = columnSizes; + this.valueCounts = valueCounts; + this.nullValueCounts = nullValueCounts; + this.lowerBounds = lowerBounds; + this.upperBounds = upperBounds; + } + + public String getFilePath() + { + return filePath; + } + + public String getFileFormat() + { + return fileFormat; + } + + public long getRecordCount() + { + return recordCount; + } + + public long getFileSizeInBytes() + { + return fileSizeInBytes; + } + + public Map getColumnSizes() + { + return columnSizes; + } + + public Map getValueCounts() + { + return valueCounts; + } + + public Map getNullValueCounts() + { + return nullValueCounts; + } + + public Map getLowerBounds() + { + return lowerBounds; + } + + public Map getUpperBounds() + { + return upperBounds; + } + } +} diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergSmoke.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergSmoke.java index 6fbbcfc150d77..4efaba360c57e 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergSmoke.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergSmoke.java @@ -13,16 +13,35 @@ */ package com.facebook.presto.iceberg; +import com.facebook.presto.Session; +import com.facebook.presto.testing.MaterializedResult; import com.facebook.presto.testing.QueryRunner; +import com.facebook.presto.testing.assertions.Assert; import com.facebook.presto.tests.AbstractTestIntegrationSmokeTest; import com.google.common.collect.ImmutableMap; +import org.apache.iceberg.FileFormat; +import org.intellij.lang.annotations.Language; import org.testng.annotations.Test; +import java.util.function.BiConsumer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static com.facebook.presto.common.type.VarcharType.VARCHAR; import static com.facebook.presto.iceberg.IcebergQueryRunner.createIcebergQueryRunner; +import static com.facebook.presto.testing.MaterializedResult.resultBuilder; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.collect.Iterables.getOnlyElement; +import static java.lang.String.format; +import static org.assertj.core.api.Assertions.assertThat; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; public class TestIcebergSmoke extends AbstractTestIntegrationSmokeTest { + private static final Pattern WITH_CLAUSE_EXTRACTER = Pattern.compile(".*(WITH\\s*\\([^)]*\\))\\s*$", Pattern.DOTALL); + @Override protected QueryRunner createQueryRunner() throws Exception @@ -30,33 +49,633 @@ protected QueryRunner createQueryRunner() return createIcebergQueryRunner(ImmutableMap.of()); } + @Test + public void testTimestamp() + { + // TODO + } + + @Test @Override - protected boolean isParameterizedVarcharSupported() + public void testDescribeTable() + { + MaterializedResult expectedColumns = resultBuilder(getQueryRunner().getDefaultSession(), VARCHAR, VARCHAR, VARCHAR, VARCHAR) + .row("orderkey", "bigint", "", "") + .row("custkey", "bigint", "", "") + .row("orderstatus", "varchar", "", "") + .row("totalprice", "double", "", "") + .row("orderdate", "date", "", "") + .row("orderpriority", "varchar", "", "") + .row("clerk", "varchar", "", "") + .row("shippriority", "integer", "", "") + .row("comment", "varchar", "", "") + .build(); + MaterializedResult actualColumns = computeActual("DESCRIBE orders"); + Assert.assertEquals(actualColumns, expectedColumns); + } + + public void testShowCreateTable() { - return false; + assertThat(computeActual("SHOW CREATE TABLE orders").getOnlyValue()) + .isEqualTo("CREATE TABLE iceberg.tpch.orders (\n" + + " orderkey bigint,\n" + + " custkey bigint,\n" + + " orderstatus varchar,\n" + + " totalprice double,\n" + + " orderdate date,\n" + + " orderpriority varchar,\n" + + " clerk varchar,\n" + + " shippriority integer,\n" + + " comment varchar\n" + + ")\n" + + "WITH (\n" + + " format = 'ORC'\n" + + ")"); } @Test public void testDecimal() + { + testWithAllFileFormats((session, format) -> testDecimalForFormat(session, format)); + } + + private void testDecimalForFormat(Session session, FileFormat format) + { + testDecimalWithPrecisionAndScale(session, format, 1, 0); + testDecimalWithPrecisionAndScale(session, format, 8, 6); + testDecimalWithPrecisionAndScale(session, format, 9, 8); + testDecimalWithPrecisionAndScale(session, format, 10, 8); + + testDecimalWithPrecisionAndScale(session, format, 18, 1); + testDecimalWithPrecisionAndScale(session, format, 18, 8); + testDecimalWithPrecisionAndScale(session, format, 18, 17); + + testDecimalWithPrecisionAndScale(session, format, 17, 16); + testDecimalWithPrecisionAndScale(session, format, 18, 17); + testDecimalWithPrecisionAndScale(session, format, 24, 10); + testDecimalWithPrecisionAndScale(session, format, 30, 10); + testDecimalWithPrecisionAndScale(session, format, 37, 26); + testDecimalWithPrecisionAndScale(session, format, 38, 37); + + testDecimalWithPrecisionAndScale(session, format, 38, 17); + testDecimalWithPrecisionAndScale(session, format, 38, 37); + } + + private void testDecimalWithPrecisionAndScale(Session session, FileFormat format, int precision, int scale) + { + checkArgument(precision >= 1 && precision <= 38, "Decimal precision (%s) must be between 1 and 38 inclusive", precision); + checkArgument(scale < precision && scale >= 0, "Decimal scale (%s) must be less than the precision (%s) and non-negative", scale, precision); + + String tableName = format("test_decimal_p%d_s%d", precision, scale); + String decimalType = format("DECIMAL(%d,%d)", precision, scale); + String beforeTheDecimalPoint = "12345678901234567890123456789012345678".substring(0, precision - scale); + String afterTheDecimalPoint = "09876543210987654321098765432109876543".substring(0, scale); + String decimalValue = format("%s.%s", beforeTheDecimalPoint, afterTheDecimalPoint); + + assertUpdate(session, format("CREATE TABLE %s (x %s) WITH (format = '%s')", tableName, decimalType, format.name())); + assertUpdate(session, format("INSERT INTO %s (x) VALUES (CAST('%s' AS %s))", tableName, decimalValue, decimalType), 1); + assertQuery(session, format("SELECT * FROM %s", tableName), format("SELECT CAST('%s' AS %s)", decimalValue, decimalType)); + dropTable(session, tableName); + } + + @Test + public void testParquetPartitionByTimestamp() { // TODO } @Test - public void testTimestamp() + public void testParquetSelectByTimestamp() { // TODO } @Test - public void testCreatePartitionedTable() + public void testOrcPartitionByTimestamp() { // TODO } + @Test + public void testOrcSelectByTimestamp() + { + // TODO + } + + @Test + public void testCreatePartitionedTable() + { + testWithAllFileFormats(this::testCreatePartitionedTable); + } + + private void testCreatePartitionedTable(Session session, FileFormat fileFormat) + { + @Language("SQL") String createTable = "" + + "CREATE TABLE test_partitioned_table (" + + " _string VARCHAR" + + ", _bigint BIGINT" + + ", _integer INTEGER" + + ", _real REAL" + + ", _double DOUBLE" + + ", _boolean BOOLEAN" + + ", _decimal_short DECIMAL(3,2)" + + ", _decimal_long DECIMAL(30,10)" + + ", _date DATE" + + ") " + + "WITH (" + + "format = '" + fileFormat + "', " + + "partitioning = ARRAY[" + + " '_string'," + + " '_integer'," + + " '_bigint'," + + " '_boolean'," + + " '_real'," + + " '_double'," + + " '_decimal_short', " + + " '_decimal_long'," + + " '_date']" + + ")"; + + assertUpdate(session, createTable); + + MaterializedResult result = computeActual("SELECT * from test_partitioned_table"); + assertEquals(result.getRowCount(), 0); + + @Language("SQL") String select = "" + + "SELECT" + + " 'foo' _string" + + ", CAST(123 AS BIGINT) _bigint" + + ", 456 _integer" + + ", CAST('123.45' AS REAL) _real" + + ", CAST('3.14' AS DOUBLE) _double" + + ", true _boolean" + + ", CAST('3.14' AS DECIMAL(3,2)) _decimal_short" + + ", CAST('12345678901234567890.0123456789' AS DECIMAL(30,10)) _decimal_long" + + ", CAST('2017-05-01' AS DATE) _date"; + + assertUpdate(session, "INSERT INTO test_partitioned_table " + select, 1); + assertQuery(session, "SELECT * from test_partitioned_table", select); + assertQuery(session, "" + + "SELECT * FROM test_partitioned_table WHERE" + + " 'foo' = _string" + + " AND 456 = _integer" + + " AND CAST(123 AS BIGINT) = _bigint" + + " AND true = _boolean" + + " AND CAST('3.14' AS DECIMAL(3,2)) = _decimal_short" + + " AND CAST('12345678901234567890.0123456789' AS DECIMAL(30,10)) = _decimal_long" + + " AND CAST('2017-05-01' AS DATE) = _date", + select); + + dropTable(session, "test_partitioned_table"); + } + + @Test + public void testCreatePartitionedTableWithNestedTypes() + { + testWithAllFileFormats(this::testCreatePartitionedTableWithNestedTypes); + } + + private void testCreatePartitionedTableWithNestedTypes(Session session, FileFormat fileFormat) + { + @Language("SQL") String createTable = "" + + "CREATE TABLE test_partitioned_table_nested_type (" + + " _string VARCHAR" + + ", _struct ROW(_field1 INT, _field2 VARCHAR)" + + ", _date DATE" + + ") " + + "WITH (" + + "format = '" + fileFormat + "', " + + "partitioning = ARRAY['_date']" + + ")"; + + assertUpdate(session, createTable); + + dropTable(session, "test_partitioned_table_nested_type"); + } + + @Test + public void testPartitionedTableWithNullValues() + { + testWithAllFileFormats(this::testPartitionedTableWithNullValues); + } + + private void testPartitionedTableWithNullValues(Session session, FileFormat fileFormat) + { + @Language("SQL") String createTable = "" + + "CREATE TABLE test_partitioned_table_with_null_values (" + + " _string VARCHAR" + + ", _bigint BIGINT" + + ", _integer INTEGER" + + ", _real REAL" + + ", _double DOUBLE" + + ", _boolean BOOLEAN" + + ", _decimal_short DECIMAL(3,2)" + + ", _decimal_long DECIMAL(30,10)" + + ", _date DATE" + + ") " + + "WITH (" + + "format = '" + fileFormat + "', " + + "partitioning = ARRAY[" + + " '_string'," + + " '_integer'," + + " '_bigint'," + + " '_boolean'," + + " '_real'," + + " '_double'," + + " '_decimal_short', " + + " '_decimal_long'," + + " '_date']" + + ")"; + + assertUpdate(session, createTable); + + MaterializedResult result = computeActual("SELECT * from test_partitioned_table_with_null_values"); + assertEquals(result.getRowCount(), 0); + + @Language("SQL") String select = "" + + "SELECT" + + " null _string" + + ", null _bigint" + + ", null _integer" + + ", null _real" + + ", null _double" + + ", null _boolean" + + ", null _decimal_short" + + ", null _decimal_long" + + ", null _date"; + + assertUpdate(session, "INSERT INTO test_partitioned_table_with_null_values " + select, 1); + assertQuery(session, "SELECT * from test_partitioned_table_with_null_values", select); + dropTable(session, "test_partitioned_table_with_null_values"); + } + @Test public void testCreatePartitionedTableAs() + { + testWithAllFileFormats(this::testCreatePartitionedTableAs); + } + + private void testCreatePartitionedTableAs(Session session, FileFormat fileFormat) + { + @Language("SQL") String createTable = "" + + "CREATE TABLE test_create_partitioned_table_as " + + "WITH (" + + "format = '" + fileFormat + "', " + + "partitioning = ARRAY['ORDER_STATUS', 'Ship_Priority', 'Bucket(order_key,9)']" + + ") " + + "AS " + + "SELECT orderkey AS order_key, shippriority AS ship_priority, orderstatus AS order_status " + + "FROM tpch.tiny.orders"; + + assertUpdate(session, createTable, "SELECT count(*) from orders"); + + String createTableSql = format("" + + "CREATE TABLE %s.%s.%s (\n" + + " \"order_key\" bigint,\n" + + " \"ship_priority\" integer,\n" + + " \"order_status\" varchar\n" + + ")\n" + + "WITH (\n" + + " format = '" + fileFormat + "',\n" + + " partitioning = ARRAY['order_status','ship_priority','bucket(order_key, 9)']\n" + + ")", + getSession().getCatalog().get(), + getSession().getSchema().get(), + "test_create_partitioned_table_as"); + + MaterializedResult actualResult = computeActual("SHOW CREATE TABLE test_create_partitioned_table_as"); + assertEquals(getOnlyElement(actualResult.getOnlyColumnAsSet()), createTableSql); + + assertQuery(session, "SELECT * from test_create_partitioned_table_as", "SELECT orderkey, shippriority, orderstatus FROM orders"); + + dropTable(session, "test_create_partitioned_table_as"); + } + + @Test + public void testColumnComments() + { + Session session = getSession(); + assertUpdate(session, "CREATE TABLE test_column_comments (_bigint BIGINT COMMENT 'test column comment')"); + + assertQuery(session, "SHOW COLUMNS FROM test_column_comments", + "VALUES ('_bigint', 'bigint', '', 'test column comment')"); + + dropTable(session, "test_column_comments"); + } + + @Test + public void testTableComments() + { + Session session = getSession(); + String createTableTemplate = "" + + "CREATE TABLE iceberg.tpch.test_table_comments (\n" + + " \"_x\" bigint\n" + + ")\n" + + "COMMENT '%s'\n" + + "WITH (\n" + + " format = 'ORC'\n" + + ")"; + String createTableSql = format(createTableTemplate, "test table comment"); + assertUpdate(createTableSql); + MaterializedResult resultOfCreate = computeActual("SHOW CREATE TABLE test_table_comments"); + assertEquals(getOnlyElement(resultOfCreate.getOnlyColumnAsSet()), createTableSql); + + dropTable(session, "test_table_comments"); + } + + @Test + public void testRollbackSnapshot() + { + Session session = getSession(); + MaterializedResult result = computeActual("SHOW SCHEMAS FROM system"); + assertUpdate(session, "CREATE TABLE test_rollback (col0 INTEGER, col1 BIGINT)"); + long afterCreateTableId = getLatestSnapshotId(); + + assertUpdate(session, "INSERT INTO test_rollback (col0, col1) VALUES (123, CAST(987 AS BIGINT))", 1); + long afterFirstInsertId = getLatestSnapshotId(); + + assertUpdate(session, "INSERT INTO test_rollback (col0, col1) VALUES (456, CAST(654 AS BIGINT))", 1); + assertQuery(session, "SELECT * FROM test_rollback ORDER BY col0", "VALUES (123, CAST(987 AS BIGINT)), (456, CAST(654 AS BIGINT))"); + + assertUpdate(format("CALL system.rollback_to_snapshot('tpch', 'test_rollback', %s)", afterFirstInsertId)); + assertQuery(session, "SELECT * FROM test_rollback ORDER BY col0", "VALUES (123, CAST(987 AS BIGINT))"); + + assertUpdate(format("CALL system.rollback_to_snapshot('tpch', 'test_rollback', %s)", afterCreateTableId)); + assertEquals((long) computeActual(session, "SELECT COUNT(*) FROM test_rollback").getOnlyValue(), 0); + + dropTable(session, "test_rollback"); + } + + private long getLatestSnapshotId() + { + return (long) computeActual("SELECT snapshot_id FROM \"test_rollback$snapshots\" ORDER BY committed_at DESC LIMIT 1") + .getOnlyValue(); + } + + @Test + public void testInsertIntoNotNullColumn() + { + // TODO: To support non-null column. (NOT_NULL_COLUMN_CONSTRAINT) + } + + @Test + public void testSchemaEvolution() + { + // TODO: Support schema evolution for PARQUET. Schema evolution should be id based. + testSchemaEvolution(getSession(), FileFormat.ORC); + } + + private void testSchemaEvolution(Session session, FileFormat fileFormat) + { + assertUpdate(session, "CREATE TABLE test_schema_evolution_drop_end (col0 INTEGER, col1 INTEGER, col2 INTEGER) WITH (format = '" + fileFormat + "')"); + assertUpdate(session, "INSERT INTO test_schema_evolution_drop_end VALUES (0, 1, 2)", 1); + assertQuery(session, "SELECT * FROM test_schema_evolution_drop_end", "VALUES(0, 1, 2)"); + assertUpdate(session, "ALTER TABLE test_schema_evolution_drop_end DROP COLUMN col2"); + assertQuery(session, "SELECT * FROM test_schema_evolution_drop_end", "VALUES(0, 1)"); + assertUpdate(session, "ALTER TABLE test_schema_evolution_drop_end ADD COLUMN col2 INTEGER"); + assertQuery(session, "SELECT * FROM test_schema_evolution_drop_end", "VALUES(0, 1, NULL)"); + assertUpdate(session, "INSERT INTO test_schema_evolution_drop_end VALUES (3, 4, 5)", 1); + assertQuery(session, "SELECT * FROM test_schema_evolution_drop_end", "VALUES(0, 1, NULL), (3, 4, 5)"); + dropTable(session, "test_schema_evolution_drop_end"); + + assertUpdate(session, "CREATE TABLE test_schema_evolution_drop_middle (col0 INTEGER, col1 INTEGER, col2 INTEGER) WITH (format = '" + fileFormat + "')"); + assertUpdate(session, "INSERT INTO test_schema_evolution_drop_middle VALUES (0, 1, 2)", 1); + assertQuery(session, "SELECT * FROM test_schema_evolution_drop_middle", "VALUES(0, 1, 2)"); + assertUpdate(session, "ALTER TABLE test_schema_evolution_drop_middle DROP COLUMN col1"); + assertQuery(session, "SELECT * FROM test_schema_evolution_drop_middle", "VALUES(0, 2)"); + assertUpdate(session, "ALTER TABLE test_schema_evolution_drop_middle ADD COLUMN col1 INTEGER"); + assertUpdate(session, "INSERT INTO test_schema_evolution_drop_middle VALUES (3, 4, 5)", 1); + assertQuery(session, "SELECT * FROM test_schema_evolution_drop_middle", "VALUES(0, 2, NULL), (3, 4, 5)"); + dropTable(session, "test_schema_evolution_drop_middle"); + } + + @Test + private void testCreateTableLike() + { + Session session = getSession(); + assertUpdate(session, "CREATE TABLE test_create_table_like_original (col1 INTEGER, aDate DATE) WITH(format = 'PARQUET', partitioning = ARRAY['aDate'])"); + assertEquals(getTablePropertiesString("test_create_table_like_original"), "WITH (\n" + + " format = 'PARQUET',\n" + + " partitioning = ARRAY['adate']\n" + + ")"); + + assertUpdate(session, "CREATE TABLE test_create_table_like_copy0 (LIKE test_create_table_like_original, col2 INTEGER)"); + assertUpdate(session, "INSERT INTO test_create_table_like_copy0 (col1, aDate, col2) VALUES (1, CAST('1950-06-28' AS DATE), 3)", 1); + assertQuery(session, "SELECT * from test_create_table_like_copy0", "VALUES(1, CAST('1950-06-28' AS DATE), 3)"); + dropTable(session, "test_create_table_like_copy0"); + + assertUpdate(session, "CREATE TABLE test_create_table_like_copy1 (LIKE test_create_table_like_original)"); + assertEquals(getTablePropertiesString("test_create_table_like_copy1"), "WITH (\n" + + " format = 'PARQUET'\n" + + ")"); + dropTable(session, "test_create_table_like_copy1"); + + assertUpdate(session, "CREATE TABLE test_create_table_like_copy2 (LIKE test_create_table_like_original EXCLUDING PROPERTIES)"); + assertEquals(getTablePropertiesString("test_create_table_like_copy2"), "WITH (\n" + + " format = 'PARQUET'\n" + + ")"); + dropTable(session, "test_create_table_like_copy2"); + + assertUpdate(session, "CREATE TABLE test_create_table_like_copy3 (LIKE test_create_table_like_original INCLUDING PROPERTIES)"); + assertEquals(getTablePropertiesString("test_create_table_like_copy3"), "WITH (\n" + + " format = 'PARQUET',\n" + + " partitioning = ARRAY['adate']\n" + + ")"); + dropTable(session, "test_create_table_like_copy3"); + + assertUpdate(session, "CREATE TABLE test_create_table_like_copy4 (LIKE test_create_table_like_original INCLUDING PROPERTIES) WITH (format = 'ORC')"); + assertEquals(getTablePropertiesString("test_create_table_like_copy4"), "WITH (\n" + + " format = 'ORC',\n" + + " partitioning = ARRAY['adate']\n" + + ")"); + dropTable(session, "test_create_table_like_copy4"); + + dropTable(session, "test_create_table_like_original"); + } + + private String getTablePropertiesString(String tableName) + { + MaterializedResult showCreateTable = computeActual("SHOW CREATE TABLE " + tableName); + String createTable = (String) getOnlyElement(showCreateTable.getOnlyColumnAsSet()); + Matcher matcher = WITH_CLAUSE_EXTRACTER.matcher(createTable); + if (matcher.matches()) { + return matcher.group(1); + } + else { + return null; + } + } + + @Test + public void testPredicating() + { + testWithAllFileFormats(this::testPredicating); + } + + private void testPredicating(Session session, FileFormat fileFormat) + { + assertUpdate(session, "CREATE TABLE test_predicating_on_real (col REAL) WITH (format = '" + fileFormat + "')"); + assertUpdate(session, "INSERT INTO test_predicating_on_real VALUES 1.2", 1); + assertQuery(session, "SELECT * FROM test_predicating_on_real WHERE col = 1.2", "VALUES 1.2"); + dropTable(session, "test_predicating_on_real"); + } + + @Test + public void testDateTransforms() { // TODO } + + @Test + public void testTruncateTransform() + { + testWithAllFileFormats(this::testTruncateTransformsForFormat); + } + + private void testTruncateTransformsForFormat(Session session, FileFormat format) + { + String select = "SELECT d_trunc, row_count, d.min AS d_min, d.max AS d_max, b.min AS b_min, b.max AS b_max FROM \"test_truncate_transform$partitions\""; + + assertUpdate(session, format("CREATE TABLE test_truncate_transform (d VARCHAR, b BIGINT)" + + " WITH (format = '%s', partitioning = ARRAY['truncate(d, 2)'])", format.name())); + + String insertSql = "INSERT INTO test_truncate_transform VALUES" + + "('abcd', 1)," + + "('abxy', 2)," + + "('ab598', 3)," + + "('mommy', 4)," + + "('moscow', 5)," + + "('Greece', 6)," + + "('Grozny', 7)"; + assertUpdate(session, insertSql, 7); + + assertQuery(session, "SELECT COUNT(*) FROM \"test_truncate_transform$partitions\"", "SELECT 3"); + + assertQuery(session, "SELECT b FROM test_truncate_transform WHERE substr(d, 1, 2) = 'ab'", "SELECT b FROM (VALUES (1), (2), (3)) AS t(b)"); + assertQuery(session, select + " WHERE d_trunc = 'ab'", "VALUES('ab', 3, 'ab598', 'abxy', 1, 3)"); + + assertQuery(session, "SELECT b FROM test_truncate_transform WHERE substr(d, 1, 2) = 'mo'", "SELECT b FROM (VALUES (4), (5)) AS t(b)"); + assertQuery(session, select + " WHERE d_trunc = 'mo'", "VALUES('mo', 2, 'mommy', 'moscow', 4, 5)"); + + assertQuery(session, "SELECT b FROM test_truncate_transform WHERE substr(d, 1, 2) = 'Gr'", "SELECT b FROM (VALUES (6), (7)) AS t(b)"); + assertQuery(session, select + " WHERE d_trunc = 'Gr'", "VALUES('Gr', 2, 'Greece', 'Grozny', 6, 7)"); + + dropTable(session, "test_truncate_transform"); + } + + @Test + public void testBucketTransform() + { + testWithAllFileFormats(this::testBucketTransformsForFormat); + } + + private void testBucketTransformsForFormat(Session session, FileFormat format) + { + String select = "SELECT d_bucket, row_count, d.min AS d_min, d.max AS d_max, b.min AS b_min, b.max AS b_max FROM \"test_bucket_transform$partitions\""; + + assertUpdate(session, format("CREATE TABLE test_bucket_transform (d VARCHAR, b BIGINT)" + + " WITH (format = '%s', partitioning = ARRAY['bucket(d, 2)'])", format.name())); + String insertSql = "INSERT INTO test_bucket_transform VALUES" + + "('abcd', 1)," + + "('abxy', 2)," + + "('ab598', 3)," + + "('mommy', 4)," + + "('moscow', 5)," + + "('Greece', 6)," + + "('Grozny', 7)"; + assertUpdate(session, insertSql, 7); + + assertQuery(session, "SELECT COUNT(*) FROM \"test_bucket_transform$partitions\"", "SELECT 2"); + + assertQuery(session, select + " WHERE d_bucket = 0", "VALUES(0, 3, 'Grozny', 'mommy', 1, 7)"); + + assertQuery(session, select + " WHERE d_bucket = 1", "VALUES(1, 4, 'Greece', 'moscow', 2, 6)"); + + dropTable(session, "test_bucket_transform"); + } + + private void testWithAllFileFormats(BiConsumer test) + { + test.accept(getSession(), FileFormat.PARQUET); + test.accept(getSession(), FileFormat.ORC); + } + + private void dropTable(Session session, String table) + { + assertUpdate(session, "DROP TABLE " + table); + assertFalse(getQueryRunner().tableExists(session, table)); + } + @Test + public void testCreateNestedPartitionedTable() + { + testWithAllFileFormats(this::testCreateNestedPartitionedTable); + } + + public void testCreateNestedPartitionedTable(Session session, FileFormat fileFormat) + { + @Language("SQL") String createTable = "" + + "CREATE TABLE test_nested_table (" + + " bool BOOLEAN" + + ", int INTEGER" + + ", arr ARRAY(VARCHAR)" + + ", big BIGINT" + + ", rl REAL" + + ", dbl DOUBLE" + + ", mp MAP(INTEGER, VARCHAR)" + + ", dec DECIMAL(5,2)" + + ", vc VARCHAR" + + ", vb VARBINARY" + + ", str ROW(id INTEGER , vc VARCHAR)" + + ", dt DATE)" + + " WITH (partitioning = ARRAY['int']," + + " format = '" + fileFormat + "'" + + ")"; + + assertUpdate(session, createTable); + + assertUpdate(session, "INSERT INTO test_nested_table " + + " select true, 1, array['uno', 'dos', 'tres'], BIGINT '1', REAL '1.0', DOUBLE '1.0', map(array[1,2,3,4], array['ek','don','teen','char'])," + + " CAST(1.0 as DECIMAL(5,2))," + + " 'one', VARBINARY 'binary0/1values',\n" + + " (CAST(ROW(null, 'this is a random value') AS ROW(int, varchar))), current_date", 1); + MaterializedResult result = computeActual("SELECT * from test_nested_table"); + assertEquals(result.getRowCount(), 1); + + dropTable(session, "test_nested_table"); + + @Language("SQL") String createTable2 = "" + + "CREATE TABLE test_nested_table (" + + " int INTEGER" + + ", arr ARRAY(ROW(id INTEGER, vc VARCHAR))" + + ", big BIGINT" + + ", rl REAL" + + ", dbl DOUBLE" + + ", mp MAP(INTEGER, ARRAY(VARCHAR))" + + ", dec DECIMAL(5,2)" + + ", str ROW(id INTEGER, vc VARCHAR, arr ARRAY(INTEGER))" + + ", vc VARCHAR)" + + " WITH (partitioning = ARRAY['int']," + + " format = '" + fileFormat + "'" + + ")"; + + assertUpdate(session, createTable2); + + assertUpdate(session, "INSERT INTO test_nested_table " + + " select 1, array[cast(row(1, null) as row(int, varchar)), cast(row(2, 'dos') as row(int, varchar))], BIGINT '1', REAL '1.0', DOUBLE '1.0', " + + "map(array[1,2], array[array['ek', 'one'], array['don', 'do', 'two']]), CAST(1.0 as DECIMAL(5,2)), " + + "CAST(ROW(1, 'this is a random value', null) AS ROW(int, varchar, array(int))), 'one'", 1); + result = computeActual("SELECT * from test_nested_table"); + assertEquals(result.getRowCount(), 1); + + @Language("SQL") String createTable3 = "" + + "CREATE TABLE test_nested_table2 WITH (partitioning = ARRAY['int']) as select * from test_nested_table"; + + assertUpdate(session, createTable3, 1); + + result = computeActual("SELECT * from test_nested_table2"); + assertEquals(result.getRowCount(), 1); + + dropTable(session, "test_nested_table"); + dropTable(session, "test_nested_table2"); + } } diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergSystemTables.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergSystemTables.java new file mode 100644 index 0000000000000..9627435fb5645 --- /dev/null +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergSystemTables.java @@ -0,0 +1,182 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg; + +import com.facebook.presto.Session; +import com.facebook.presto.testing.MaterializedResult; +import com.facebook.presto.testing.MaterializedRow; +import com.facebook.presto.testing.QueryRunner; +import com.facebook.presto.tests.AbstractTestQueryFramework; +import com.facebook.presto.tests.DistributedQueryRunner; +import com.google.common.collect.ImmutableMap; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.nio.file.Path; +import java.time.LocalDate; +import java.util.Map; +import java.util.function.Function; + +import static com.facebook.presto.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; +import static com.facebook.presto.testing.TestingSession.testSessionBuilder; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static org.testng.Assert.assertEquals; + +public class TestIcebergSystemTables + extends AbstractTestQueryFramework +{ + private static final int DEFAULT_PRECISION = 5; + + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + Session session = testSessionBuilder() + .setCatalog(ICEBERG_CATALOG) + .build(); + DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(session).build(); + + Path dataDir = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data"); + + queryRunner.installPlugin(new IcebergPlugin()); + Map icebergProperties = ImmutableMap.builder() + .put("hive.metastore", "file") + .put("hive.metastore.catalog.dir", dataDir.toString() + "/catalog") + .build(); + + queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", icebergProperties); + + return queryRunner; + } + + @BeforeClass + public void setUp() + { + assertUpdate("CREATE SCHEMA test_schema"); + assertUpdate("CREATE TABLE test_schema.test_table (_bigint BIGINT, _date DATE) WITH (partitioning = ARRAY['_date'])"); + assertUpdate("INSERT INTO test_schema.test_table VALUES (0, CAST('2019-09-08' AS DATE)), (1, CAST('2019-09-09' AS DATE)), (2, CAST('2019-09-09' AS DATE))", 3); + assertUpdate("INSERT INTO test_schema.test_table VALUES (3, CAST('2019-09-09' AS DATE)), (4, CAST('2019-09-10' AS DATE)), (5, CAST('2019-09-10' AS DATE))", 3); + assertQuery("SELECT count(*) FROM test_schema.test_table", "VALUES 6"); + + assertUpdate("CREATE TABLE test_schema.test_table_multilevel_partitions (_varchar VARCHAR, _bigint BIGINT, _date DATE) WITH (partitioning = ARRAY['_bigint', '_date'])"); + assertUpdate("INSERT INTO test_schema.test_table_multilevel_partitions VALUES ('a', 0, CAST('2019-09-08' AS DATE)), ('a', 1, CAST('2019-09-08' AS DATE)), ('a', 0, CAST('2019-09-09' AS DATE))", 3); + assertQuery("SELECT count(*) FROM test_schema.test_table_multilevel_partitions", "VALUES 3"); + } + + @Test + public void testPartitionTable() + { + assertQuery("SELECT count(*) FROM test_schema.test_table", "VALUES 6"); + assertQuery("SHOW COLUMNS FROM test_schema.\"test_table$partitions\"", + "VALUES ('_date', 'date', '', '')," + + "('row_count', 'bigint', '', '')," + + "('file_count', 'bigint', '', '')," + + "('total_size', 'bigint', '', '')," + + "('_bigint', 'row(\"min\" bigint, \"max\" bigint, \"null_count\" bigint)', '', '')"); + + MaterializedResult result = computeActual("SELECT * from test_schema.\"test_table$partitions\""); + assertEquals(result.getRowCount(), 3); + + Map rowsByPartition = result.getMaterializedRows().stream() + .collect(toImmutableMap(row -> (LocalDate) row.getField(0), Function.identity())); + + // Test if row counts are computed correctly + assertEquals(rowsByPartition.get(LocalDate.parse("2019-09-08")).getField(1), 1L); + assertEquals(rowsByPartition.get(LocalDate.parse("2019-09-09")).getField(1), 3L); + assertEquals(rowsByPartition.get(LocalDate.parse("2019-09-10")).getField(1), 2L); + + // Test if min/max values and null value count are computed correctly. + assertEquals( + rowsByPartition.get(LocalDate.parse("2019-09-08")).getField(4), + new MaterializedRow(DEFAULT_PRECISION, 0L, 0L, 0L).getFields()); + assertEquals( + rowsByPartition.get(LocalDate.parse("2019-09-09")).getField(4), + new MaterializedRow(DEFAULT_PRECISION, 1L, 3L, 0L).getFields()); + assertEquals( + rowsByPartition.get(LocalDate.parse("2019-09-10")).getField(4), + new MaterializedRow(DEFAULT_PRECISION, 4L, 5L, 0L).getFields()); + } + + @Test + public void testHistoryTable() + { + assertQuery("SHOW COLUMNS FROM test_schema.\"test_table$history\"", + "VALUES ('made_current_at', 'timestamp with time zone', '', '')," + + "('snapshot_id', 'bigint', '', '')," + + "('parent_id', 'bigint', '', '')," + + "('is_current_ancestor', 'boolean', '', '')"); + + // Test the number of history entries + assertQuery("SELECT count(*) FROM test_schema.\"test_table$history\"", "VALUES 3"); + } + + @Test + public void testSnapshotsTable() + { + assertQuery("SHOW COLUMNS FROM test_schema.\"test_table$snapshots\"", + "VALUES ('committed_at', 'timestamp with time zone', '', '')," + + "('snapshot_id', 'bigint', '', '')," + + "('parent_id', 'bigint', '', '')," + + "('operation', 'varchar', '', '')," + + "('manifest_list', 'varchar', '', '')," + + "('summary', 'map(varchar, varchar)', '', '')"); + + assertQuery("SELECT operation FROM test_schema.\"test_table$snapshots\"", "VALUES 'append', 'append', 'append'"); + assertQuery("SELECT summary['total-records'] FROM test_schema.\"test_table$snapshots\"", "VALUES '0', '3', '6'"); + } + + @Test + public void testManifestsTable() + { + assertQuery("SHOW COLUMNS FROM test_schema.\"test_table$manifests\"", + "VALUES ('path', 'varchar', '', '')," + + "('length', 'bigint', '', '')," + + "('partition_spec_id', 'integer', '', '')," + + "('added_snapshot_id', 'bigint', '', '')," + + "('added_data_files_count', 'integer', '', '')," + + "('existing_data_files_count', 'integer', '', '')," + + "('deleted_data_files_count', 'integer', '', '')," + + "('partitions', 'array(row(\"contains_null\" boolean, \"lower_bound\" varchar, \"upper_bound\" varchar))', '', '')"); + assertQuerySucceeds("SELECT * FROM test_schema.\"test_table$manifests\""); + + assertQuerySucceeds("SELECT * FROM test_schema.\"test_table_multilevel_partitions$manifests\""); + } + + @Test + public void testFilesTable() + { + assertQuery("SHOW COLUMNS FROM test_schema.\"test_table$files\"", + "VALUES ('file_path', 'varchar', '', '')," + + "('file_format', 'varchar', '', '')," + + "('record_count', 'bigint', '', '')," + + "('file_size_in_bytes', 'bigint', '', '')," + + "('column_sizes', 'map(integer, bigint)', '', '')," + + "('value_counts', 'map(integer, bigint)', '', '')," + + "('null_value_counts', 'map(integer, bigint)', '', '')," + + "('lower_bounds', 'map(integer, varchar)', '', '')," + + "('upper_bounds', 'map(integer, varchar)', '', '')," + + "('key_metadata', 'varbinary', '', '')," + + "('split_offsets', 'array(bigint)', '', '')"); + assertQuerySucceeds("SELECT * FROM test_schema.\"test_table$files\""); + } + + @AfterClass(alwaysRun = true) + public void tearDown() + { + assertUpdate("DROP TABLE IF EXISTS test_schema.test_table"); + assertUpdate("DROP TABLE IF EXISTS test_schema.test_table_multilevel_partitions"); + assertUpdate("DROP SCHEMA IF EXISTS test_schema"); + } +} diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/OrcWriter.java b/presto-orc/src/main/java/com/facebook/presto/orc/OrcWriter.java index 8fa45ce800b96..b69bfb18decb4 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/OrcWriter.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/OrcWriter.java @@ -144,6 +144,9 @@ public class OrcWriter private long previouslyRecordedSizeInBytes; private boolean closed; + private long numberOfRows; + private List unencryptedStats; + @Nullable private final OrcWriteValidation.OrcWriteValidationBuilder validationBuilder; @@ -162,6 +165,41 @@ public OrcWriter( boolean validate, OrcWriteValidationMode validationMode, WriterStats stats) + { + this( + dataSink, + columnNames, + types, + Optional.empty(), + orcEncoding, + compressionKind, + encryption, + dwrfEncryptionProvider, + options, + dwrfOptions, + userMetadata, + hiveStorageTimeZone, + validate, + validationMode, + stats); + } + + public OrcWriter( + DataSink dataSink, + List columnNames, + List types, + Optional> inputOrcTypes, + OrcEncoding orcEncoding, + CompressionKind compressionKind, + Optional encryption, + DwrfEncryptionProvider dwrfEncryptionProvider, + OrcWriterOptions options, + Optional dwrfOptions, + Map userMetadata, + DateTimeZone hiveStorageTimeZone, + boolean validate, + OrcWriteValidationMode validationMode, + WriterStats stats) { this.validationBuilder = validate ? new OrcWriteValidation.OrcWriteValidationBuilder(validationMode, types).setStringStatisticsLimitInBytes(toIntExact(options.getMaxStringStatisticsLimit().toBytes())) : null; @@ -199,7 +237,8 @@ public OrcWriter( this.stats = requireNonNull(stats, "stats is null"); requireNonNull(columnNames, "columnNames is null"); - this.orcTypes = OrcType.createOrcRowType(0, columnNames, types); + requireNonNull(inputOrcTypes, "inputOrcTypes is null"); + this.orcTypes = inputOrcTypes.orElseGet(() -> OrcType.createOrcRowType(0, columnNames, types)); recordValidation(validation -> validation.setColumnNames(columnNames)); dwrfWriterEncryption = requireNonNull(encryption, "encryption is null"); @@ -646,7 +685,7 @@ private List bufferFileFooter() Slice metadataSlice = metadataWriter.writeMetadata(metadata); outputData.add(createDataOutput(metadataSlice)); - long numberOfRows = closedStripes.stream() + numberOfRows = closedStripes.stream() .mapToLong(stripe -> stripe.getStripeInformation().getNumberOfRows()) .sum(); @@ -660,7 +699,7 @@ private List bufferFileFooter() Map userMetadata = this.userMetadata.entrySet().stream() .collect(Collectors.toMap(Entry::getKey, entry -> utf8Slice(entry.getValue()))); - List unencryptedStats = new ArrayList<>(); + unencryptedStats = new ArrayList<>(); Map> encryptedStats = new HashMap<>(); addStatsRecursive(fileStats, 0, new HashMap<>(), unencryptedStats, encryptedStats); Optional dwrfEncryption; @@ -807,6 +846,18 @@ public void validate(OrcDataSource input) DwrfKeyProvider.of(intermediateKeyMetadata.build())); } + public long getFileRowCount() + { + checkState(closed, "File row count is not available until the writing has finished"); + return numberOfRows; + } + + public List getFileStats() + { + checkState(closed, "File statistics are not available until the writing has finished"); + return unencryptedStats; + } + private static List toDenseList(Map data, int expectedSize) { checkArgument(data.size() == expectedSize); diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataReader.java b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataReader.java index ababc8c4bee5a..1482ec5077531 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataReader.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataReader.java @@ -492,7 +492,7 @@ private static OrcType toType(OrcProto.Type type) precision = Optional.of(type.getPrecision()); scale = Optional.of(type.getScale()); } - return new OrcType(toTypeKind(type.getKind()), type.getSubtypesList(), type.getFieldNamesList(), length, precision, scale); + return new OrcType(toTypeKind(type.getKind()), type.getSubtypesList(), type.getFieldNamesList(), length, precision, scale, toMap(type.getAttributesList())); } private static List toType(List types) @@ -546,6 +546,20 @@ private static OrcTypeKind toTypeKind(OrcProto.Type.Kind typeKind) } } + // This method assumes type attributes have no duplicate key + private static Map toMap(List attributes) + { + ImmutableMap.Builder results = new ImmutableMap.Builder<>(); + if (attributes != null) { + for (OrcProto.StringPair attribute : attributes) { + if (attribute.hasKey() && attribute.hasValue()) { + results.put(attribute.getKey(), attribute.getValue()); + } + } + } + return results.build(); + } + private static StreamKind toStreamKind(OrcProto.Stream.Kind streamKind) { switch (streamKind) { diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataWriter.java b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataWriter.java index f14b5d221132e..c519df4d36726 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataWriter.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataWriter.java @@ -33,10 +33,12 @@ import java.io.IOException; import java.io.OutputStream; import java.util.List; +import java.util.Map; import java.util.Map.Entry; import java.util.Optional; import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.collect.ImmutableList.toImmutableList; import static java.lang.Math.toIntExact; import static java.util.stream.Collectors.toList; @@ -145,7 +147,8 @@ private static Type toType(OrcType type) Builder builder = Type.newBuilder() .setKind(toTypeKind(type.getOrcTypeKind())) .addAllSubtypes(type.getFieldTypeIndexes()) - .addAllFieldNames(type.getFieldNames()); + .addAllFieldNames(type.getFieldNames()) + .addAllAttributes(toStringPairList(type.getAttributes())); if (type.getLength().isPresent()) { builder.setMaximumLength(type.getLength().get()); @@ -202,6 +205,16 @@ private static OrcProto.Type.Kind toTypeKind(OrcTypeKind orcTypeKind) throw new IllegalArgumentException("Unsupported type: " + orcTypeKind); } + private static List toStringPairList(Map attributes) + { + return attributes.entrySet().stream() + .map(entry -> OrcProto.StringPair.newBuilder() + .setKey(entry.getKey()) + .setValue(entry.getValue()) + .build()) + .collect(toImmutableList()); + } + private static OrcProto.ColumnStatistics toColumnStatistics(ColumnStatistics columnStatistics) { OrcProto.ColumnStatistics.Builder builder = OrcProto.ColumnStatistics.newBuilder(); diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcType.java b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcType.java index 097cb2d3ebb42..94b57418afe68 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcType.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcType.java @@ -20,9 +20,11 @@ import com.facebook.presto.common.type.TypeSignatureParameter; import com.facebook.presto.common.type.VarcharType; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Optional; import static com.facebook.presto.common.type.BigintType.BIGINT; @@ -79,6 +81,7 @@ public enum OrcTypeKind private final Optional length; private final Optional precision; private final Optional scale; + private final Map attributes; private OrcType(OrcTypeKind orcTypeKind) { @@ -101,6 +104,11 @@ private OrcType(OrcTypeKind orcTypeKind, List fieldTypeIndexes, List fieldTypeIndexes, List fieldNames, Optional length, Optional precision, Optional scale) + { + this(orcTypeKind, fieldTypeIndexes, fieldNames, length, precision, scale, ImmutableMap.of()); + } + + public OrcType(OrcTypeKind orcTypeKind, List fieldTypeIndexes, List fieldNames, Optional length, Optional precision, Optional scale, Map attributes) { this.orcTypeKind = requireNonNull(orcTypeKind, "typeKind is null"); this.fieldTypeIndexes = ImmutableList.copyOf(requireNonNull(fieldTypeIndexes, "fieldTypeIndexes is null")); @@ -114,6 +122,7 @@ public OrcType(OrcTypeKind orcTypeKind, List fieldTypeIndexes, List getScale() return scale; } + public Map getAttributes() + { + return attributes; + } + @Override public String toString() { diff --git a/presto-server/pom.xml b/presto-server/pom.xml index 5f597a7138203..1c67432bc23b6 100644 --- a/presto-server/pom.xml +++ b/presto-server/pom.xml @@ -307,6 +307,14 @@ zip provided + + + com.facebook.presto + presto-iceberg + ${project.version} + zip + provided + diff --git a/presto-server/src/main/assembly/presto.xml b/presto-server/src/main/assembly/presto.xml index f135ceff45dfc..563ee2ed2c061 100644 --- a/presto-server/src/main/assembly/presto.xml +++ b/presto-server/src/main/assembly/presto.xml @@ -184,5 +184,9 @@ ${project.build.directory}/dependency/presto-bigquery-${project.version} plugin/presto-bigquery + + ${project.build.directory}/dependency/presto-iceberg-${project.version} + plugin/iceberg +