diff --git a/pom.xml b/pom.xml
index ffe041931075b..880cff4ec3161 100644
--- a/pom.xml
+++ b/pom.xml
@@ -608,7 +608,7 @@
com.facebook.presto.orc
orc-protobuf
- 11
+ 12
diff --git a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/HiveType.java b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/HiveType.java
index de390c2f0aa5a..566fa5f3ac8c3 100644
--- a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/HiveType.java
+++ b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/HiveType.java
@@ -194,7 +194,7 @@ public static List toHiveTypes(String hiveTypes)
.collect(toList()));
}
- private static HiveType toHiveType(TypeInfo typeInfo)
+ public static HiveType toHiveType(TypeInfo typeInfo)
{
requireNonNull(typeInfo, "typeInfo is null");
return new HiveType(typeInfo);
diff --git a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java
index 58cef8f596730..a0e7eecbc5acf 100644
--- a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java
+++ b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java
@@ -432,7 +432,7 @@ public synchronized void replaceTable(MetastoreContext metastoreContext, String
checkArgument(!newTable.getTableType().equals(TEMPORARY_TABLE), "temporary tables must never be stored in the metastore");
Table table = getRequiredTable(metastoreContext, databaseName, tableName);
- if (!table.getTableType().equals(VIRTUAL_VIEW) || !newTable.getTableType().equals(VIRTUAL_VIEW)) {
+ if ((!table.getTableType().equals(VIRTUAL_VIEW) || !newTable.getTableType().equals(VIRTUAL_VIEW)) && !isIcebergTable(table.getParameters())) {
throw new PrestoException(HIVE_METASTORE_ERROR, "Only views can be updated with replaceTable");
}
if (!table.getDatabaseName().equals(databaseName) || !table.getTableName().equals(tableName)) {
diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/OrcFileWriter.java b/presto-hive/src/main/java/com/facebook/presto/hive/OrcFileWriter.java
index 8752c5e7bfcc3..695951e310380 100644
--- a/presto-hive/src/main/java/com/facebook/presto/hive/OrcFileWriter.java
+++ b/presto-hive/src/main/java/com/facebook/presto/hive/OrcFileWriter.java
@@ -29,6 +29,7 @@
import com.facebook.presto.orc.OrcWriterOptions;
import com.facebook.presto.orc.WriterStats;
import com.facebook.presto.orc.metadata.CompressionKind;
+import com.facebook.presto.orc.metadata.OrcType;
import com.facebook.presto.spi.PrestoException;
import com.google.common.collect.ImmutableList;
import org.joda.time.DateTimeZone;
@@ -58,7 +59,7 @@ public class OrcFileWriter
private static final int INSTANCE_SIZE = ClassLayout.parseClass(OrcFileWriter.class).instanceSize();
private static final ThreadMXBean THREAD_MX_BEAN = ManagementFactory.getThreadMXBean();
- private final OrcWriter orcWriter;
+ protected final OrcWriter orcWriter;
private final Callable rollbackAction;
private final int[] fileInputColumnIndexes;
private final List nullBlocks;
@@ -83,6 +84,43 @@ public OrcFileWriter(
WriterStats stats,
DwrfEncryptionProvider dwrfEncryptionProvider,
Optional dwrfWriterEncryption)
+ {
+ this(
+ dataSink,
+ rollbackAction,
+ orcEncoding,
+ columnNames,
+ fileColumnTypes,
+ Optional.empty(),
+ compression,
+ options,
+ fileInputColumnIndexes,
+ metadata,
+ hiveStorageTimeZone,
+ validationInputFactory,
+ validationMode,
+ stats,
+ dwrfEncryptionProvider,
+ dwrfWriterEncryption);
+ }
+
+ public OrcFileWriter(
+ DataSink dataSink,
+ Callable rollbackAction,
+ OrcEncoding orcEncoding,
+ List columnNames,
+ List fileColumnTypes,
+ Optional> fileColumnOrcTypes,
+ CompressionKind compression,
+ OrcWriterOptions options,
+ int[] fileInputColumnIndexes,
+ Map metadata,
+ DateTimeZone hiveStorageTimeZone,
+ Optional> validationInputFactory,
+ OrcWriteValidationMode validationMode,
+ WriterStats stats,
+ DwrfEncryptionProvider dwrfEncryptionProvider,
+ Optional dwrfWriterEncryption)
{
requireNonNull(dataSink, "dataSink is null");
@@ -91,6 +129,7 @@ public OrcFileWriter(
dataSink,
columnNames,
fileColumnTypes,
+ fileColumnOrcTypes,
orcEncoding,
compression,
dwrfWriterEncryption,
diff --git a/presto-iceberg/pom.xml b/presto-iceberg/pom.xml
index 3df057236cac8..5cf4e4fc8a0f4 100644
--- a/presto-iceberg/pom.xml
+++ b/presto-iceberg/pom.xml
@@ -21,11 +21,6 @@
concurrent
-
- com.facebook.presto
- presto-client
-
-
com.facebook.presto
presto-hive-common
@@ -104,6 +99,11 @@
+
+ com.facebook.presto
+ presto-orc
+
+
com.facebook.presto
presto-plugin-toolkit
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/FilesTable.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/FilesTable.java
new file mode 100644
index 0000000000000..898848c7a2b4f
--- /dev/null
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/FilesTable.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.iceberg;
+
+import com.facebook.presto.common.Page;
+import com.facebook.presto.common.predicate.TupleDomain;
+import com.facebook.presto.common.type.ArrayType;
+import com.facebook.presto.common.type.StandardTypes;
+import com.facebook.presto.common.type.TypeManager;
+import com.facebook.presto.common.type.TypeSignatureParameter;
+import com.facebook.presto.iceberg.util.PageListBuilder;
+import com.facebook.presto.spi.ColumnMetadata;
+import com.facebook.presto.spi.ConnectorPageSource;
+import com.facebook.presto.spi.ConnectorSession;
+import com.facebook.presto.spi.ConnectorTableMetadata;
+import com.facebook.presto.spi.FixedPageSource;
+import com.facebook.presto.spi.SchemaTableName;
+import com.facebook.presto.spi.SystemTable;
+import com.facebook.presto.spi.connector.ConnectorTransactionHandle;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import io.airlift.slice.Slices;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.TableScan;
+import org.apache.iceberg.transforms.Transforms;
+import org.apache.iceberg.types.Conversions;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+import static com.facebook.presto.common.type.BigintType.BIGINT;
+import static com.facebook.presto.common.type.IntegerType.INTEGER;
+import static com.facebook.presto.common.type.VarbinaryType.VARBINARY;
+import static com.facebook.presto.common.type.VarcharType.VARCHAR;
+import static com.facebook.presto.iceberg.IcebergUtil.getTableScan;
+import static com.facebook.presto.iceberg.util.PageListBuilder.forTable;
+import static com.google.common.collect.ImmutableMap.toImmutableMap;
+import static java.util.Objects.requireNonNull;
+
+public class FilesTable
+ implements SystemTable
+{
+ private final ConnectorTableMetadata tableMetadata;
+ private final Table icebergTable;
+ private final Optional snapshotId;
+
+ public FilesTable(SchemaTableName tableName, Table icebergTable, Optional snapshotId, TypeManager typeManager)
+ {
+ this.icebergTable = requireNonNull(icebergTable, "icebergTable is null");
+
+ tableMetadata = new ConnectorTableMetadata(requireNonNull(tableName, "tableName is null"),
+ ImmutableList.builder()
+ .add(new ColumnMetadata("file_path", VARCHAR))
+ .add(new ColumnMetadata("file_format", VARCHAR))
+ .add(new ColumnMetadata("record_count", BIGINT))
+ .add(new ColumnMetadata("file_size_in_bytes", BIGINT))
+ .add(new ColumnMetadata("column_sizes", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(
+ TypeSignatureParameter.of(INTEGER.getTypeSignature()),
+ TypeSignatureParameter.of(BIGINT.getTypeSignature())))))
+ .add(new ColumnMetadata("value_counts", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(
+ TypeSignatureParameter.of(INTEGER.getTypeSignature()),
+ TypeSignatureParameter.of(BIGINT.getTypeSignature())))))
+ .add(new ColumnMetadata("null_value_counts", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(
+ TypeSignatureParameter.of(INTEGER.getTypeSignature()),
+ TypeSignatureParameter.of(BIGINT.getTypeSignature())))))
+ .add(new ColumnMetadata("lower_bounds", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(
+ TypeSignatureParameter.of(INTEGER.getTypeSignature()),
+ TypeSignatureParameter.of(VARCHAR.getTypeSignature())))))
+ .add(new ColumnMetadata("upper_bounds", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(
+ TypeSignatureParameter.of(INTEGER.getTypeSignature()),
+ TypeSignatureParameter.of(VARCHAR.getTypeSignature())))))
+ .add(new ColumnMetadata("key_metadata", VARBINARY))
+ .add(new ColumnMetadata("split_offsets", new ArrayType(BIGINT)))
+ .build());
+ this.snapshotId = requireNonNull(snapshotId, "snapshotId is null");
+ }
+
+ @Override
+ public Distribution getDistribution()
+ {
+ return Distribution.SINGLE_COORDINATOR;
+ }
+
+ @Override
+ public ConnectorTableMetadata getTableMetadata()
+ {
+ return tableMetadata;
+ }
+
+ @Override
+ public ConnectorPageSource pageSource(ConnectorTransactionHandle transactionHandle, ConnectorSession session, TupleDomain constraint)
+ {
+ return new FixedPageSource(buildPages(tableMetadata, session, icebergTable, snapshotId));
+ }
+
+ private static List buildPages(ConnectorTableMetadata tableMetadata, ConnectorSession session, Table icebergTable, Optional snapshotId)
+ {
+ PageListBuilder pagesBuilder = forTable(tableMetadata);
+ TableScan tableScan = getTableScan(TupleDomain.all(), snapshotId, icebergTable).includeColumnStats();
+ Map idToTypeMap = getIdToTypeMap(icebergTable.schema());
+
+ tableScan.planFiles().forEach(fileScanTask -> {
+ DataFile dataFile = fileScanTask.file();
+ pagesBuilder.beginRow();
+ pagesBuilder.appendVarchar(dataFile.path().toString());
+ pagesBuilder.appendVarchar(dataFile.format().name());
+ pagesBuilder.appendBigint(dataFile.recordCount());
+ pagesBuilder.appendBigint(dataFile.fileSizeInBytes());
+ if (checkNonNull(dataFile.columnSizes(), pagesBuilder)) {
+ pagesBuilder.appendIntegerBigintMap(dataFile.columnSizes());
+ }
+ if (checkNonNull(dataFile.valueCounts(), pagesBuilder)) {
+ pagesBuilder.appendIntegerBigintMap(dataFile.valueCounts());
+ }
+ if (checkNonNull(dataFile.nullValueCounts(), pagesBuilder)) {
+ pagesBuilder.appendIntegerBigintMap(dataFile.nullValueCounts());
+ }
+ if (checkNonNull(dataFile.lowerBounds(), pagesBuilder)) {
+ pagesBuilder.appendIntegerVarcharMap(dataFile.lowerBounds().entrySet().stream()
+ .collect(toImmutableMap(
+ Map.Entry::getKey,
+ entry -> Transforms.identity(idToTypeMap.get(entry.getKey())).toHumanString(
+ Conversions.fromByteBuffer(idToTypeMap.get(entry.getKey()), entry.getValue())))));
+ }
+ if (checkNonNull(dataFile.upperBounds(), pagesBuilder)) {
+ pagesBuilder.appendIntegerVarcharMap(dataFile.upperBounds().entrySet().stream()
+ .collect(toImmutableMap(
+ Map.Entry::getKey,
+ entry -> Transforms.identity(idToTypeMap.get(entry.getKey())).toHumanString(
+ Conversions.fromByteBuffer(idToTypeMap.get(entry.getKey()), entry.getValue())))));
+ }
+ if (checkNonNull(dataFile.keyMetadata(), pagesBuilder)) {
+ pagesBuilder.appendVarbinary(Slices.wrappedBuffer(dataFile.keyMetadata()));
+ }
+ if (checkNonNull(dataFile.splitOffsets(), pagesBuilder)) {
+ pagesBuilder.appendBigintArray(dataFile.splitOffsets());
+ }
+ pagesBuilder.endRow();
+ });
+
+ return pagesBuilder.build();
+ }
+
+ private static Map getIdToTypeMap(Schema schema)
+ {
+ ImmutableMap.Builder idToTypeMap = ImmutableMap.builder();
+ for (Types.NestedField field : schema.columns()) {
+ populateIdToTypeMap(field, idToTypeMap);
+ }
+ return idToTypeMap.build();
+ }
+
+ private static void populateIdToTypeMap(Types.NestedField field, ImmutableMap.Builder idToTypeMap)
+ {
+ Type type = field.type();
+ idToTypeMap.put(field.fieldId(), type);
+ if (type instanceof Type.NestedType) {
+ type.asNestedType().fields().forEach(child -> populateIdToTypeMap(child, idToTypeMap));
+ }
+ }
+
+ private static boolean checkNonNull(Object object, PageListBuilder pagesBuilder)
+ {
+ if (object == null) {
+ pagesBuilder.appendNull();
+ return false;
+ }
+ return true;
+ }
+}
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergFileWriterFactory.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergFileWriterFactory.java
index 10c78108078d5..c7fcd5904ab65 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergFileWriterFactory.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergFileWriterFactory.java
@@ -13,14 +13,26 @@
*/
package com.facebook.presto.iceberg;
+import com.facebook.presto.common.io.DataSink;
+import com.facebook.presto.common.io.OutputStreamDataSink;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.hive.FileFormatDataSourceStats;
import com.facebook.presto.hive.HdfsContext;
import com.facebook.presto.hive.HdfsEnvironment;
+import com.facebook.presto.hive.HiveDwrfEncryptionProvider;
+import com.facebook.presto.hive.HiveSessionProperties;
+import com.facebook.presto.hive.NodeVersion;
+import com.facebook.presto.hive.OrcFileWriterConfig;
+import com.facebook.presto.hive.orc.HdfsOrcDataSource;
+import com.facebook.presto.orc.DwrfEncryptionProvider;
+import com.facebook.presto.orc.OrcDataSource;
+import com.facebook.presto.orc.OrcDataSourceId;
+import com.facebook.presto.orc.OrcWriterStats;
import com.facebook.presto.parquet.writer.ParquetWriterOptions;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.PrestoException;
+import com.google.common.collect.ImmutableMap;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
@@ -32,35 +44,58 @@
import java.io.IOException;
import java.util.List;
+import java.util.Optional;
import java.util.concurrent.Callable;
+import java.util.function.Supplier;
import java.util.stream.IntStream;
+import static com.facebook.presto.hive.HiveMetadata.PRESTO_VERSION_NAME;
import static com.facebook.presto.hive.HiveSessionProperties.getParquetWriterBlockSize;
import static com.facebook.presto.hive.HiveSessionProperties.getParquetWriterPageSize;
+import static com.facebook.presto.hive.metastore.MetastoreUtil.PRESTO_QUERY_ID_NAME;
import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_WRITER_OPEN_ERROR;
+import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_WRITE_VALIDATION_FAILED;
import static com.facebook.presto.iceberg.IcebergSessionProperties.getCompressionCodec;
+import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxBufferSize;
+import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance;
+import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcOptimizedWriterValidateMode;
+import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcStreamBufferSize;
+import static com.facebook.presto.iceberg.IcebergSessionProperties.isOrcOptimizedWriterValidate;
+import static com.facebook.presto.iceberg.TypeConverter.toOrcType;
import static com.facebook.presto.iceberg.TypeConverter.toPrestoType;
import static com.facebook.presto.iceberg.util.PrimitiveTypeMapBuilder.makeTypeMap;
+import static com.facebook.presto.orc.OrcEncoding.ORC;
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.util.Objects.requireNonNull;
import static org.apache.iceberg.parquet.ParquetSchemaUtil.convert;
+import static org.joda.time.DateTimeZone.UTC;
public class IcebergFileWriterFactory
{
private final HdfsEnvironment hdfsEnvironment;
private final TypeManager typeManager;
private final FileFormatDataSourceStats readStats;
+ private final NodeVersion nodeVersion;
+ private final OrcWriterStats orcWriterStats = new OrcWriterStats();
+ private final OrcFileWriterConfig orcFileWriterConfig;
+ private final DwrfEncryptionProvider dwrfEncryptionProvider;
@Inject
public IcebergFileWriterFactory(
HdfsEnvironment hdfsEnvironment,
TypeManager typeManager,
- FileFormatDataSourceStats readStats)
+ FileFormatDataSourceStats readStats,
+ NodeVersion nodeVersion,
+ OrcFileWriterConfig orcFileWriterConfig,
+ HiveDwrfEncryptionProvider dwrfEncryptionProvider)
{
this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
this.typeManager = requireNonNull(typeManager, "typeManager is null");
this.readStats = requireNonNull(readStats, "readStats is null");
+ this.nodeVersion = requireNonNull(nodeVersion, "nodeVersion is null");
+ this.orcFileWriterConfig = requireNonNull(orcFileWriterConfig, "orcFileWriterConfig is null");
+ this.dwrfEncryptionProvider = requireNonNull(dwrfEncryptionProvider, "DwrfEncryptionProvider is null").toDwrfEncryptionProvider();
}
public IcebergFileWriter createFileWriter(
@@ -72,9 +107,10 @@ public IcebergFileWriter createFileWriter(
FileFormat fileFormat)
{
switch (fileFormat) {
- // TODO: support ORC
case PARQUET:
return createParquetWriter(outputPath, icebergSchema, jobConf, session, hdfsContext);
+ case ORC:
+ return createOrcWriter(outputPath, icebergSchema, jobConf, session);
}
throw new PrestoException(NOT_SUPPORTED, "File format not supported for Iceberg: " + fileFormat);
}
@@ -124,4 +160,81 @@ private IcebergFileWriter createParquetWriter(
throw new PrestoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
}
}
+
+ private IcebergFileWriter createOrcWriter(
+ Path outputPath,
+ Schema icebergSchema,
+ JobConf jobConf,
+ ConnectorSession session)
+ {
+ try {
+ FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), outputPath, jobConf);
+ DataSink orcDataSink = hdfsEnvironment.doAs(session.getUser(), () -> new OutputStreamDataSink(fileSystem.create(outputPath)));
+ Callable rollbackAction = () -> {
+ hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.delete(outputPath, false));
+ return null;
+ };
+
+ List columnFields = icebergSchema.columns();
+ List fileColumnNames = columnFields.stream()
+ .map(Types.NestedField::name)
+ .collect(toImmutableList());
+ List fileColumnTypes = columnFields.stream()
+ .map(Types.NestedField::type)
+ .map(type -> toPrestoType(type, typeManager))
+ .collect(toImmutableList());
+
+ Optional> validationInputFactory = Optional.empty();
+ if (isOrcOptimizedWriterValidate(session)) {
+ validationInputFactory = Optional.of(() -> {
+ try {
+ return new HdfsOrcDataSource(
+ new OrcDataSourceId(outputPath.toString()),
+ hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.getFileStatus(outputPath).getLen()),
+ getOrcMaxMergeDistance(session),
+ getOrcMaxBufferSize(session),
+ getOrcStreamBufferSize(session),
+ false,
+ hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.open(outputPath)),
+ readStats);
+ }
+ catch (IOException e) {
+ throw new PrestoException(ICEBERG_WRITE_VALIDATION_FAILED, e);
+ }
+ });
+ }
+
+ return new IcebergOrcFileWriter(
+ icebergSchema,
+ orcDataSink,
+ rollbackAction,
+ ORC,
+ fileColumnNames,
+ fileColumnTypes,
+ toOrcType(icebergSchema),
+ getCompressionCodec(session).getOrcCompressionKind(),
+ orcFileWriterConfig
+ .toOrcWriterOptionsBuilder()
+ .withStripeMinSize(HiveSessionProperties.getOrcOptimizedWriterMinStripeSize(session))
+ .withStripeMaxSize(HiveSessionProperties.getOrcOptimizedWriterMaxStripeSize(session))
+ .withStripeMaxRowCount(HiveSessionProperties.getOrcOptimizedWriterMaxStripeRows(session))
+ .withDictionaryMaxMemory(HiveSessionProperties.getOrcOptimizedWriterMaxDictionaryMemory(session))
+ .withMaxStringStatisticsLimit(HiveSessionProperties.getOrcStringStatisticsLimit(session))
+ .build(),
+ IntStream.range(0, fileColumnNames.size()).toArray(),
+ ImmutableMap.builder()
+ .put(PRESTO_VERSION_NAME, nodeVersion.toString())
+ .put(PRESTO_QUERY_ID_NAME, session.getQueryId())
+ .build(),
+ UTC,
+ validationInputFactory,
+ getOrcOptimizedWriterValidateMode(session),
+ orcWriterStats,
+ dwrfEncryptionProvider,
+ Optional.empty());
+ }
+ catch (IOException e) {
+ throw new PrestoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating ORC file", e);
+ }
+ }
}
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergMetadata.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergMetadata.java
index 6aa1a6235e345..500c45d9757f9 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergMetadata.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergMetadata.java
@@ -104,6 +104,7 @@
import static com.google.common.base.Verify.verify;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
+import static java.util.Collections.singletonList;
import static java.util.Objects.requireNonNull;
import static java.util.function.Function.identity;
import static java.util.stream.Collectors.toList;
@@ -200,7 +201,7 @@ private Optional getRawSystemTable(ConnectorSession session, Schema
return Optional.empty();
}
- org.apache.iceberg.Table table = getIcebergTable(metastore, hdfsEnvironment, session, tableName);
+ org.apache.iceberg.Table table = getIcebergTable(metastore, hdfsEnvironment, session, new SchemaTableName(tableName.getSchemaName(), name.getTableName()));
SchemaTableName systemTableName = new SchemaTableName(tableName.getSchemaName(), name.getTableNameWithType());
switch (name.getTableType()) {
@@ -220,6 +221,8 @@ private Optional getRawSystemTable(ConnectorSession session, Schema
return Optional.of(new PartitionTable(systemTableName, typeManager, table, getSnapshotId(table, name.getSnapshotId())));
case MANIFESTS:
return Optional.of(new ManifestsTable(systemTableName, table, getSnapshotId(table, name.getSnapshotId())));
+ case FILES:
+ return Optional.of(new FilesTable(systemTableName, table, getSnapshotId(table, name.getSnapshotId()), typeManager));
}
return Optional.empty();
}
@@ -261,7 +264,7 @@ public ColumnMetadata getColumnMetadata(ConnectorSession session, ConnectorTable
@Override
public Map> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix)
{
- List tables = listTables(session, Optional.of(prefix.getSchemaName()));
+ List tables = prefix.getTableName() != null ? singletonList(prefix.toSchemaTableName()) : listTables(session, Optional.of(prefix.getSchemaName()));
ImmutableMap.Builder> columns = ImmutableMap.builder();
for (SchemaTableName table : tables) {
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergModule.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergModule.java
index 4a42e1a9f50cf..2312dc68b25f4 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergModule.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergModule.java
@@ -21,6 +21,7 @@
import com.facebook.presto.cache.NoOpCacheManager;
import com.facebook.presto.cache.filemerge.FileMergeCacheConfig;
import com.facebook.presto.cache.filemerge.FileMergeCacheManager;
+import com.facebook.presto.hive.CacheStatsMBean;
import com.facebook.presto.hive.DynamicConfigurationProvider;
import com.facebook.presto.hive.FileFormatDataSourceStats;
import com.facebook.presto.hive.ForCachingHiveMetastore;
@@ -29,9 +30,11 @@
import com.facebook.presto.hive.HdfsConfigurationInitializer;
import com.facebook.presto.hive.HdfsEnvironment;
import com.facebook.presto.hive.HiveClientConfig;
+import com.facebook.presto.hive.HiveDwrfEncryptionProvider;
import com.facebook.presto.hive.HiveHdfsConfiguration;
import com.facebook.presto.hive.HiveNodePartitioningProvider;
import com.facebook.presto.hive.MetastoreClientConfig;
+import com.facebook.presto.hive.OrcFileWriterConfig;
import com.facebook.presto.hive.ParquetFileWriterConfig;
import com.facebook.presto.hive.PartitionMutator;
import com.facebook.presto.hive.cache.HiveCachingHdfsConfiguration;
@@ -42,16 +45,32 @@
import com.facebook.presto.hive.metastore.ExtendedHiveMetastore;
import com.facebook.presto.hive.metastore.HivePartitionMutator;
import com.facebook.presto.hive.metastore.MetastoreConfig;
+import com.facebook.presto.orc.CachingStripeMetadataSource;
+import com.facebook.presto.orc.EncryptionLibrary;
+import com.facebook.presto.orc.OrcDataSourceId;
+import com.facebook.presto.orc.StorageStripeMetadataSource;
+import com.facebook.presto.orc.StripeMetadataSource;
+import com.facebook.presto.orc.StripeReader;
+import com.facebook.presto.orc.UnsupportedEncryptionLibrary;
+import com.facebook.presto.orc.cache.CachingOrcFileTailSource;
+import com.facebook.presto.orc.cache.OrcCacheConfig;
+import com.facebook.presto.orc.cache.OrcFileTailSource;
+import com.facebook.presto.orc.cache.StorageOrcFileTailSource;
+import com.facebook.presto.orc.metadata.OrcFileTail;
import com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider;
import com.facebook.presto.spi.connector.ConnectorPageSinkProvider;
import com.facebook.presto.spi.connector.ConnectorPageSourceProvider;
import com.facebook.presto.spi.connector.ConnectorSplitManager;
import com.facebook.presto.spi.procedure.Procedure;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
import com.google.inject.Binder;
import com.google.inject.Module;
import com.google.inject.Provides;
import com.google.inject.Scopes;
import com.google.inject.multibindings.Multibinder;
+import io.airlift.slice.Slice;
+import org.weakref.jmx.MBeanExporter;
import org.weakref.jmx.testing.TestingMBeanServer;
import javax.inject.Singleton;
@@ -64,13 +83,23 @@
import static com.facebook.airlift.json.JsonCodecBinder.jsonCodecBinder;
import static com.facebook.presto.cache.CacheType.FILE_MERGE;
import static com.google.inject.multibindings.Multibinder.newSetBinder;
+import static java.lang.Math.toIntExact;
import static java.util.concurrent.Executors.newFixedThreadPool;
import static java.util.concurrent.Executors.newScheduledThreadPool;
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
+import static org.weakref.jmx.ObjectNames.generatedNameOf;
import static org.weakref.jmx.guice.ExportBinder.newExporter;
public class IcebergModule
implements Module
{
+ private final String connectorId;
+
+ public IcebergModule(String connectorId)
+ {
+ this.connectorId = connectorId;
+ }
+
@Override
public void configure(Binder binder)
{
@@ -119,6 +148,14 @@ public void configure(Binder binder)
Multibinder procedures = newSetBinder(binder, Procedure.class);
procedures.addBinding().toProvider(RollbackToSnapshotProcedure.class).in(Scopes.SINGLETON);
+
+ // for orc
+ binder.bind(EncryptionLibrary.class).annotatedWith(HiveDwrfEncryptionProvider.ForCryptoService.class).to(UnsupportedEncryptionLibrary.class).in(Scopes.SINGLETON);
+ binder.bind(EncryptionLibrary.class).annotatedWith(HiveDwrfEncryptionProvider.ForUnknown.class).to(UnsupportedEncryptionLibrary.class).in(Scopes.SINGLETON);
+ binder.bind(HiveDwrfEncryptionProvider.class).in(Scopes.SINGLETON);
+
+ configBinder(binder).bindConfig(OrcCacheConfig.class, connectorId);
+ configBinder(binder).bindConfig(OrcFileWriterConfig.class);
}
@ForCachingHiveMetastore
@@ -146,4 +183,50 @@ public CacheManager createCacheManager(CacheConfig cacheConfig, FileMergeCacheCo
}
return new NoOpCacheManager();
}
+
+ @Singleton
+ @Provides
+ public OrcFileTailSource createOrcFileTailSource(OrcCacheConfig orcCacheConfig, MBeanExporter exporter)
+ {
+ OrcFileTailSource orcFileTailSource = new StorageOrcFileTailSource();
+ if (orcCacheConfig.isFileTailCacheEnabled()) {
+ Cache cache = CacheBuilder.newBuilder()
+ .maximumWeight(orcCacheConfig.getFileTailCacheSize().toBytes())
+ .weigher((id, tail) -> ((OrcFileTail) tail).getFooterSize() + ((OrcFileTail) tail).getMetadataSize())
+ .expireAfterAccess(orcCacheConfig.getFileTailCacheTtlSinceLastAccess().toMillis(), MILLISECONDS)
+ .recordStats()
+ .build();
+ CacheStatsMBean cacheStatsMBean = new CacheStatsMBean(cache);
+ orcFileTailSource = new CachingOrcFileTailSource(orcFileTailSource, cache);
+ exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_OrcFileTail"), cacheStatsMBean);
+ }
+ return orcFileTailSource;
+ }
+
+ @Singleton
+ @Provides
+ public StripeMetadataSource createStripeMetadataSource(OrcCacheConfig orcCacheConfig, MBeanExporter exporter)
+ {
+ StripeMetadataSource stripeMetadataSource = new StorageStripeMetadataSource();
+ if (orcCacheConfig.isStripeMetadataCacheEnabled()) {
+ Cache footerCache = CacheBuilder.newBuilder()
+ .maximumWeight(orcCacheConfig.getStripeFooterCacheSize().toBytes())
+ .weigher((id, footer) -> toIntExact(((Slice) footer).getRetainedSize()))
+ .expireAfterAccess(orcCacheConfig.getStripeFooterCacheTtlSinceLastAccess().toMillis(), MILLISECONDS)
+ .recordStats()
+ .build();
+ Cache streamCache = CacheBuilder.newBuilder()
+ .maximumWeight(orcCacheConfig.getStripeStreamCacheSize().toBytes())
+ .weigher((id, stream) -> toIntExact(((Slice) stream).getRetainedSize()))
+ .expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS)
+ .recordStats()
+ .build();
+ CacheStatsMBean footerCacheStatsMBean = new CacheStatsMBean(footerCache);
+ CacheStatsMBean streamCacheStatsMBean = new CacheStatsMBean(streamCache);
+ stripeMetadataSource = new CachingStripeMetadataSource(stripeMetadataSource, footerCache, streamCache);
+ exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeFooter"), footerCacheStatsMBean);
+ exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStream"), streamCacheStatsMBean);
+ }
+ return stripeMetadataSource;
+ }
}
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergOrcColumn.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergOrcColumn.java
new file mode 100644
index 0000000000000..5ce39b6b90426
--- /dev/null
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergOrcColumn.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.iceberg;
+
+import com.facebook.presto.hive.HiveColumnHandle.ColumnType;
+import com.facebook.presto.orc.metadata.OrcType.OrcTypeKind;
+import com.google.common.collect.ImmutableMap;
+
+import java.util.Map;
+import java.util.Optional;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static java.util.Objects.requireNonNull;
+
+public class IcebergOrcColumn
+{
+ public static final int ROOT_COLUMN_ID = 0;
+
+ private int orcColumnId;
+ private int orcFieldTypeIndex;
+ private Optional icebergColumnId;
+ private String columnName;
+ private ColumnType columnType;
+ private OrcTypeKind orcType;
+ private Map attributes;
+
+ public IcebergOrcColumn(
+ int orcColumnId,
+ int orcFieldTypeIndex,
+ Optional icebergColumnId,
+ String columnName,
+ ColumnType columnType,
+ OrcTypeKind orcType,
+ Map attributes)
+ {
+ checkArgument(orcColumnId >= 0, "orcColumnId is negative");
+ checkArgument(orcFieldTypeIndex >= 0, "orcFieldTypeIndex is negative");
+ this.orcColumnId = orcColumnId;
+ this.orcFieldTypeIndex = orcFieldTypeIndex;
+ this.icebergColumnId = requireNonNull(icebergColumnId, "icebergColumnId is null");
+ this.columnName = requireNonNull(columnName, "columnName is null");
+ this.columnType = requireNonNull(columnType, "columnType is null");
+ this.orcType = requireNonNull(orcType, "orcType is null");
+
+ this.attributes = ImmutableMap.copyOf(requireNonNull(attributes, "attributes is null"));
+ }
+
+ public int getOrcColumnId()
+ {
+ return orcColumnId;
+ }
+
+ public int getOrcFieldTypeIndex()
+ {
+ return orcFieldTypeIndex;
+ }
+
+ public Optional getIcebergColumnId()
+ {
+ return icebergColumnId;
+ }
+
+ public IcebergOrcColumn setIcebergColumnId(Optional icebergColumnId)
+ {
+ this.icebergColumnId = requireNonNull(icebergColumnId, "icebergColumnId is null");
+ return this;
+ }
+
+ public String getColumnName()
+ {
+ return columnName;
+ }
+
+ public IcebergOrcColumn setColumnName(String columnName)
+ {
+ this.columnName = requireNonNull(columnName, "columnName is null");
+ return this;
+ }
+
+ public ColumnType getColumnType()
+ {
+ return columnType;
+ }
+
+ public IcebergOrcColumn setColumnType(ColumnType columnType)
+ {
+ this.columnType = requireNonNull(columnType, "columnType is null");
+ return this;
+ }
+
+ public OrcTypeKind getOrcType()
+ {
+ return orcType;
+ }
+
+ public IcebergOrcColumn setOrcType(OrcTypeKind orcType)
+ {
+ this.orcType = requireNonNull(orcType, "orcType is null");
+ return this;
+ }
+
+ public Map getAttributes()
+ {
+ return attributes;
+ }
+
+ public static IcebergOrcColumn copy(IcebergOrcColumn other)
+ {
+ requireNonNull(other, "copy from other IcebergOrcColumn is null");
+ return new IcebergOrcColumn(
+ other.getOrcColumnId(),
+ other.getOrcFieldTypeIndex(),
+ other.getIcebergColumnId(),
+ other.getColumnName(),
+ other.getColumnType(),
+ other.getOrcType(),
+ other.getAttributes());
+ }
+
+ @Override
+ public String toString()
+ {
+ return "IcebergOrcColumn{" +
+ "orcColumnId=" + orcColumnId +
+ ", orcFieldTypeIndex=" + orcFieldTypeIndex +
+ ", icebergColumnId=" + icebergColumnId +
+ ", columnName='" + columnName + '\'' +
+ ", columnType=" + columnType +
+ ", orcType=" + orcType +
+ ", attributes=" + attributes +
+ '}';
+ }
+}
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergOrcFileWriter.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergOrcFileWriter.java
new file mode 100644
index 0000000000000..b0de928df3020
--- /dev/null
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergOrcFileWriter.java
@@ -0,0 +1,259 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.iceberg;
+
+import com.facebook.presto.common.io.DataSink;
+import com.facebook.presto.common.type.Type;
+import com.facebook.presto.hive.OrcFileWriter;
+import com.facebook.presto.orc.DwrfEncryptionProvider;
+import com.facebook.presto.orc.DwrfWriterEncryption;
+import com.facebook.presto.orc.OrcDataSource;
+import com.facebook.presto.orc.OrcEncoding;
+import com.facebook.presto.orc.OrcWriteValidation;
+import com.facebook.presto.orc.OrcWriterOptions;
+import com.facebook.presto.orc.OrcWriterStats;
+import com.facebook.presto.orc.metadata.CompressionKind;
+import com.facebook.presto.orc.metadata.OrcType;
+import com.facebook.presto.orc.metadata.statistics.ColumnStatistics;
+import com.facebook.presto.orc.metadata.statistics.DateStatistics;
+import com.facebook.presto.orc.metadata.statistics.DecimalStatistics;
+import com.facebook.presto.orc.metadata.statistics.DoubleStatistics;
+import com.facebook.presto.orc.metadata.statistics.IntegerStatistics;
+import com.facebook.presto.orc.metadata.statistics.StringStatistics;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import io.airlift.slice.Slice;
+import org.apache.iceberg.Metrics;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.types.Conversions;
+import org.joda.time.DateTimeZone;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.function.Supplier;
+
+import static com.facebook.presto.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY;
+import static com.google.common.base.Verify.verify;
+import static java.lang.Math.toIntExact;
+import static java.util.Objects.requireNonNull;
+import static org.apache.iceberg.types.Types.DecimalType;
+import static org.apache.iceberg.types.Types.NestedField;
+
+public class IcebergOrcFileWriter
+ extends OrcFileWriter
+ implements IcebergFileWriter
+{
+ private final Schema icebergSchema;
+ private final List orcColumn;
+
+ public IcebergOrcFileWriter(
+ Schema icebergSchema,
+ DataSink dataSink,
+ Callable rollbackAction,
+ OrcEncoding orcEncoding,
+ List columnNames,
+ List fileColumnTypes,
+ List fileColumnOrcTypes,
+ CompressionKind compression,
+ OrcWriterOptions options,
+ int[] fileInputColumnIndexes,
+ Map metadata,
+ DateTimeZone hiveStorageTimeZone,
+ Optional> validationInputFactory,
+ OrcWriteValidation.OrcWriteValidationMode validationMode,
+ OrcWriterStats stats,
+ DwrfEncryptionProvider dwrfEncryptionProvider,
+ Optional dwrfWriterEncryption)
+ {
+ super(dataSink, rollbackAction, orcEncoding, columnNames, fileColumnTypes, Optional.ofNullable(fileColumnOrcTypes), compression, options, fileInputColumnIndexes, metadata, hiveStorageTimeZone, validationInputFactory, validationMode, stats, dwrfEncryptionProvider, dwrfWriterEncryption);
+ this.icebergSchema = requireNonNull(icebergSchema, "icebergSchema is null");
+ this.orcColumn = fileColumnOrcTypes;
+ }
+
+ @Override
+ public Metrics getMetrics()
+ {
+ return computeMetrics(icebergSchema, orcColumn, orcWriter.getFileRowCount(), orcWriter.getFileStats());
+ }
+
+ private static Metrics computeMetrics(Schema icebergSchema, List orcRowTypes, long fileRowCount, List columnStatistics)
+ {
+ if (columnStatistics.isEmpty()) {
+ return new Metrics(fileRowCount, null, null, null, null, null);
+ }
+ // Columns that are descendants of LIST or MAP types are excluded because:
+ // 1. Their stats are not used by Apache Iceberg to filter out data files
+ // 2. Their record count can be larger than table-level row count. There's no good way to calculate nullCounts for them.
+ // See https://github.com/apache/iceberg/pull/199#discussion_r429443627
+ Set excludedColumns = getExcludedColumns(orcRowTypes);
+
+ ImmutableMap.Builder valueCountsBuilder = ImmutableMap.builder();
+ ImmutableMap.Builder nullCountsBuilder = ImmutableMap.builder();
+ ImmutableMap.Builder lowerBoundsBuilder = ImmutableMap.builder();
+ ImmutableMap.Builder upperBoundsBuilder = ImmutableMap.builder();
+
+ // OrcColumnId(0) is the root column that represents file-level schema
+ for (int i = 1; i < orcRowTypes.size(); i++) {
+ if (excludedColumns.contains(i)) {
+ continue;
+ }
+ OrcType orcColumn = orcRowTypes.get(i);
+ ColumnStatistics orcColumnStats = columnStatistics.get(i);
+ int icebergId = getIcebergId(orcColumn);
+ NestedField icebergField = icebergSchema.findField(icebergId);
+ verify(icebergField != null, "Cannot find Iceberg column with ID %s in schema %s", icebergId, icebergSchema);
+ valueCountsBuilder.put(icebergId, fileRowCount);
+ if (orcColumnStats.hasNumberOfValues()) {
+ nullCountsBuilder.put(icebergId, fileRowCount - orcColumnStats.getNumberOfValues());
+ }
+ toIcebergMinMax(orcColumnStats, icebergField.type()).ifPresent(minMax -> {
+ lowerBoundsBuilder.put(icebergId, minMax.getMin());
+ upperBoundsBuilder.put(icebergId, minMax.getMax());
+ });
+ }
+ Map valueCounts = valueCountsBuilder.build();
+ Map nullCounts = nullCountsBuilder.build();
+ Map lowerBounds = lowerBoundsBuilder.build();
+ Map upperBounds = upperBoundsBuilder.build();
+ return new Metrics(
+ fileRowCount,
+ null, // TODO: Add column size accounting to ORC column writers
+ valueCounts.isEmpty() ? null : valueCounts,
+ nullCounts.isEmpty() ? null : nullCounts,
+ lowerBounds.isEmpty() ? null : lowerBounds,
+ upperBounds.isEmpty() ? null : upperBounds);
+ }
+
+ private static Set getExcludedColumns(List orcRowTypes)
+ {
+ ImmutableSet.Builder excludedColumns = ImmutableSet.builder();
+ populateExcludedColumns(orcRowTypes, 0, false, excludedColumns);
+ return excludedColumns.build();
+ }
+
+ private static void populateExcludedColumns(List orcRowTypes, int orcColumnId, boolean exclude, ImmutableSet.Builder excludedColumns)
+ {
+ if (exclude) {
+ excludedColumns.add(orcColumnId);
+ }
+ OrcType orcColumn = orcRowTypes.get(orcColumnId);
+ switch (orcColumn.getOrcTypeKind()) {
+ case LIST:
+ case MAP:
+ for (Integer child : orcColumn.getFieldTypeIndexes()) {
+ populateExcludedColumns(orcRowTypes, child, true, excludedColumns);
+ }
+ return;
+ case STRUCT:
+ for (Integer child : orcColumn.getFieldTypeIndexes()) {
+ populateExcludedColumns(orcRowTypes, child, exclude, excludedColumns);
+ }
+ return;
+ }
+ }
+
+ private static int getIcebergId(OrcType orcColumn)
+ {
+ String icebergId = orcColumn.getAttributes().get(ORC_ICEBERG_ID_KEY);
+ verify(icebergId != null, "ORC column %s doesn't have an associated Iceberg ID", orcColumn);
+ return Integer.parseInt(icebergId);
+ }
+
+ private static Optional toIcebergMinMax(ColumnStatistics orcColumnStats, org.apache.iceberg.types.Type icebergType)
+ {
+ IntegerStatistics integerStatistics = orcColumnStats.getIntegerStatistics();
+ if (integerStatistics != null) {
+ Object min = integerStatistics.getMin();
+ Object max = integerStatistics.getMax();
+ if (min == null || max == null) {
+ return Optional.empty();
+ }
+ if (icebergType.typeId() == org.apache.iceberg.types.Type.TypeID.INTEGER) {
+ min = toIntExact((Long) min);
+ max = toIntExact((Long) max);
+ }
+ return Optional.of(new IcebergMinMax(icebergType, min, max));
+ }
+ DoubleStatistics doubleStatistics = orcColumnStats.getDoubleStatistics();
+ if (doubleStatistics != null) {
+ Object min = doubleStatistics.getMin();
+ Object max = doubleStatistics.getMax();
+ if (min == null || max == null) {
+ return Optional.empty();
+ }
+ if (icebergType.typeId() == org.apache.iceberg.types.Type.TypeID.FLOAT) {
+ min = ((Double) min).floatValue();
+ max = ((Double) max).floatValue();
+ }
+ return Optional.of(new IcebergMinMax(icebergType, min, max));
+ }
+ StringStatistics stringStatistics = orcColumnStats.getStringStatistics();
+ if (stringStatistics != null) {
+ Slice min = stringStatistics.getMin();
+ Slice max = stringStatistics.getMax();
+ if (min == null || max == null) {
+ return Optional.empty();
+ }
+ return Optional.of(new IcebergMinMax(icebergType, min.toStringUtf8(), max.toStringUtf8()));
+ }
+ DateStatistics dateStatistics = orcColumnStats.getDateStatistics();
+ if (dateStatistics != null) {
+ Integer min = dateStatistics.getMin();
+ Integer max = dateStatistics.getMax();
+ if (min == null || max == null) {
+ return Optional.empty();
+ }
+ return Optional.of(new IcebergMinMax(icebergType, min, max));
+ }
+ DecimalStatistics decimalStatistics = orcColumnStats.getDecimalStatistics();
+ if (decimalStatistics != null) {
+ BigDecimal min = decimalStatistics.getMin();
+ BigDecimal max = decimalStatistics.getMax();
+ if (min == null || max == null) {
+ return Optional.empty();
+ }
+ min = min.setScale(((DecimalType) icebergType).scale());
+ max = max.setScale(((DecimalType) icebergType).scale());
+ return Optional.of(new IcebergMinMax(icebergType, min, max));
+ }
+ return Optional.empty();
+ }
+
+ private static class IcebergMinMax
+ {
+ private ByteBuffer min;
+ private ByteBuffer max;
+
+ private IcebergMinMax(org.apache.iceberg.types.Type type, Object min, Object max)
+ {
+ this.min = Conversions.toByteBuffer(type, min);
+ this.max = Conversions.toByteBuffer(type, max);
+ }
+
+ public ByteBuffer getMin()
+ {
+ return min;
+ }
+
+ public ByteBuffer getMax()
+ {
+ return max;
+ }
+ }
+}
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergPageSourceProvider.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergPageSourceProvider.java
index ce0a75780b08c..ac25910b31e4d 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergPageSourceProvider.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergPageSourceProvider.java
@@ -13,16 +13,40 @@
*/
package com.facebook.presto.iceberg;
+import com.facebook.presto.common.RuntimeStats;
import com.facebook.presto.common.predicate.Domain;
import com.facebook.presto.common.predicate.TupleDomain;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.common.type.Type;
+import com.facebook.presto.common.type.TypeManager;
+import com.facebook.presto.hive.EncryptionInformation;
import com.facebook.presto.hive.FileFormatDataSourceStats;
import com.facebook.presto.hive.HdfsContext;
import com.facebook.presto.hive.HdfsEnvironment;
+import com.facebook.presto.hive.HiveClientConfig;
+import com.facebook.presto.hive.HiveColumnHandle;
+import com.facebook.presto.hive.HiveDwrfEncryptionProvider;
+import com.facebook.presto.hive.HiveOrcAggregatedMemoryContext;
import com.facebook.presto.hive.filesystem.ExtendedFileSystem;
+import com.facebook.presto.hive.orc.HdfsOrcDataSource;
+import com.facebook.presto.hive.orc.OrcBatchPageSource;
+import com.facebook.presto.hive.orc.ProjectionBasedDwrfKeyProvider;
import com.facebook.presto.hive.parquet.ParquetPageSource;
import com.facebook.presto.memory.context.AggregatedMemoryContext;
+import com.facebook.presto.orc.DwrfEncryptionProvider;
+import com.facebook.presto.orc.DwrfKeyProvider;
+import com.facebook.presto.orc.OrcAggregatedMemoryContext;
+import com.facebook.presto.orc.OrcBatchRecordReader;
+import com.facebook.presto.orc.OrcDataSource;
+import com.facebook.presto.orc.OrcDataSourceId;
+import com.facebook.presto.orc.OrcEncoding;
+import com.facebook.presto.orc.OrcPredicate;
+import com.facebook.presto.orc.OrcReader;
+import com.facebook.presto.orc.OrcReaderOptions;
+import com.facebook.presto.orc.StripeMetadataSource;
+import com.facebook.presto.orc.TupleDomainOrcPredicate;
+import com.facebook.presto.orc.cache.OrcFileTailSource;
+import com.facebook.presto.orc.metadata.OrcType;
import com.facebook.presto.parquet.Field;
import com.facebook.presto.parquet.ParquetCorruptionException;
import com.facebook.presto.parquet.ParquetDataSource;
@@ -45,6 +69,8 @@
import io.airlift.units.DataSize;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.BlockMissingException;
import org.apache.iceberg.FileFormat;
@@ -64,7 +90,9 @@
import java.util.Objects;
import java.util.Optional;
import java.util.function.Function;
+import java.util.stream.IntStream;
+import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR;
import static com.facebook.presto.hive.HiveFileContext.DEFAULT_HIVE_FILE_CONTEXT;
import static com.facebook.presto.hive.HiveSessionProperties.getParquetMaxReadBlockSize;
import static com.facebook.presto.hive.HiveSessionProperties.isFailOnCorruptedParquetStatistics;
@@ -74,8 +102,22 @@
import static com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource;
import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA;
import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT;
+import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR;
import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA;
+import static com.facebook.presto.iceberg.IcebergOrcColumn.ROOT_COLUMN_ID;
+import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcLazyReadSmallRanges;
+import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxBufferSize;
+import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance;
+import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxReadBlockSize;
+import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcStreamBufferSize;
+import static com.facebook.presto.iceberg.IcebergSessionProperties.getOrcTinyStripeThreshold;
+import static com.facebook.presto.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled;
+import static com.facebook.presto.iceberg.IcebergSessionProperties.isOrcZstdJniDecompressionEnabled;
+import static com.facebook.presto.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY;
+import static com.facebook.presto.iceberg.TypeConverter.toHiveType;
import static com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext;
+import static com.facebook.presto.orc.OrcEncoding.ORC;
+import static com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE;
import static com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO;
import static com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors;
import static com.facebook.presto.parquet.ParquetTypeUtils.getParquetTypeByName;
@@ -84,24 +126,42 @@
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
+import static com.google.common.collect.Maps.uniqueIndex;
import static java.lang.String.format;
+import static java.util.Locale.ENGLISH;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toList;
import static org.apache.parquet.io.ColumnIOConverter.constructField;
+import static org.joda.time.DateTimeZone.UTC;
public class IcebergPageSourceProvider
implements ConnectorPageSourceProvider
{
private final HdfsEnvironment hdfsEnvironment;
private final FileFormatDataSourceStats fileFormatDataSourceStats;
+ private final TypeManager typeManager;
+ private final OrcFileTailSource orcFileTailSource;
+ private final StripeMetadataSource stripeMetadataSource;
+ private final DwrfEncryptionProvider dwrfEncryptionProvider;
+ private final HiveClientConfig hiveClientConfig;
@Inject
public IcebergPageSourceProvider(
HdfsEnvironment hdfsEnvironment,
- FileFormatDataSourceStats fileFormatDataSourceStats)
+ FileFormatDataSourceStats fileFormatDataSourceStats,
+ TypeManager typeManager,
+ OrcFileTailSource orcFileTailSource,
+ StripeMetadataSource stripeMetadataSource,
+ HiveDwrfEncryptionProvider dwrfEncryptionProvider,
+ HiveClientConfig hiveClientConfig)
{
this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
this.fileFormatDataSourceStats = requireNonNull(fileFormatDataSourceStats, "fileFormatDataSourceStats is null");
+ this.typeManager = requireNonNull(typeManager, "typeManager is null");
+ this.orcFileTailSource = requireNonNull(orcFileTailSource, "orcFileTailSource is null");
+ this.stripeMetadataSource = requireNonNull(stripeMetadataSource, "stripeMetadataSource is null");
+ this.dwrfEncryptionProvider = requireNonNull(dwrfEncryptionProvider, "DwrfEncryptionProvider is null").toDwrfEncryptionProvider();
+ this.hiveClientConfig = requireNonNull(hiveClientConfig, "hiveClientConfig is null");
}
@Override
@@ -139,7 +199,8 @@ public ConnectorPageSource createPageSource(
split.getFileFormat(),
table.getSchemaTableName(),
regularColumns,
- table.getPredicate());
+ table.getPredicate(),
+ splitContext.isCacheable());
return new IcebergPageSource(icebergColumns, partitionKeys, dataPageSource, session.getSqlFunctionProperties().getTimeZoneKey());
}
@@ -153,10 +214,10 @@ private ConnectorPageSource createDataPageSource(
FileFormat fileFormat,
SchemaTableName tableName,
List dataColumns,
- TupleDomain predicate)
+ TupleDomain predicate,
+ boolean isCacheable)
{
switch (fileFormat) {
- // TODO: support ORC for iceberg
case PARQUET:
return createParquetPageSource(
hdfsEnvironment,
@@ -174,6 +235,46 @@ private ConnectorPageSource createDataPageSource(
isParquetBatchReaderVerificationEnabled(session),
predicate,
fileFormatDataSourceStats);
+ case ORC:
+ FileStatus fileStatus = null;
+ try {
+ fileStatus = hdfsEnvironment.doAs(session.getUser(), () -> hdfsEnvironment.getFileSystem(hdfsContext, path).getFileStatus(path));
+ }
+ catch (IOException e) {
+ throw new PrestoException(ICEBERG_FILESYSTEM_ERROR, e);
+ }
+ long fileSize = fileStatus.getLen();
+ OrcReaderOptions readerOptions = new OrcReaderOptions(
+ getOrcMaxMergeDistance(session),
+ getOrcTinyStripeThreshold(session),
+ getOrcMaxReadBlockSize(session),
+ isOrcZstdJniDecompressionEnabled(session));
+
+ // TODO: Implement EncryptionInformation in IcebergSplit instead of Optional.empty()
+ return createBatchOrcPageSource(
+ hdfsEnvironment,
+ session.getUser(),
+ hdfsEnvironment.getConfiguration(hdfsContext, path),
+ path,
+ start,
+ length,
+ fileSize,
+ isCacheable,
+ dataColumns,
+ typeManager,
+ predicate,
+ readerOptions,
+ ORC,
+ getOrcMaxBufferSize(session),
+ getOrcStreamBufferSize(session),
+ getOrcLazyReadSmallRanges(session),
+ isOrcBloomFiltersEnabled(session),
+ hiveClientConfig.getDomainCompactionThreshold(),
+ orcFileTailSource,
+ stripeMetadataSource,
+ fileFormatDataSourceStats,
+ Optional.empty(),
+ dwrfEncryptionProvider);
}
throw new PrestoException(NOT_SUPPORTED, "File format not supported for Iceberg: " + fileFormat);
}
@@ -312,4 +413,248 @@ private static TupleDomain getParquetTupleDomain(Map regularColumns,
+ TypeManager typeManager,
+ TupleDomain effectivePredicate,
+ OrcReaderOptions options,
+ OrcEncoding orcEncoding,
+ DataSize maxBufferSize,
+ DataSize streamBufferSize,
+ boolean lazyReadSmallRanges,
+ boolean orcBloomFiltersEnabled,
+ int domainCompactionThreshold,
+ OrcFileTailSource orcFileTailSource,
+ StripeMetadataSource stripeMetadataSource,
+ FileFormatDataSourceStats stats,
+ Optional encryptionInformation,
+ DwrfEncryptionProvider dwrfEncryptionProvider)
+ {
+ OrcDataSource orcDataSource = null;
+ try {
+ FileSystem fileSystem = hdfsEnvironment.getFileSystem(user, path, configuration);
+ FSDataInputStream inputStream = hdfsEnvironment.doAs(user, () -> fileSystem.open(path));
+ orcDataSource = new HdfsOrcDataSource(
+ new OrcDataSourceId(path.toString()),
+ fileSize,
+ options.getMaxMergeDistance(),
+ maxBufferSize,
+ streamBufferSize,
+ lazyReadSmallRanges,
+ inputStream,
+ stats);
+
+ // Todo: pass real columns to ProjectionBasedDwrfKeyProvider instead of ImmutableList.of()
+ DwrfKeyProvider dwrfKeyProvider = new ProjectionBasedDwrfKeyProvider(encryptionInformation, ImmutableList.of(), true, path);
+ OrcReader reader = new OrcReader(
+ orcDataSource,
+ orcEncoding,
+ orcFileTailSource,
+ stripeMetadataSource,
+ new HiveOrcAggregatedMemoryContext(),
+ options,
+ isCacheable,
+ dwrfEncryptionProvider,
+ dwrfKeyProvider);
+
+ List physicalColumnHandles = new ArrayList<>(regularColumns.size());
+ ImmutableMap.Builder includedColumns = ImmutableMap.builder();
+ ImmutableList.Builder> columnReferences = ImmutableList.builder();
+
+ List fileOrcColumns = getFileOrcColumns(reader);
+
+ Map fileOrcColumnByIcebergId = fileOrcColumns.stream()
+ .filter(orcColumn -> orcColumn.getAttributes().containsKey(ORC_ICEBERG_ID_KEY))
+ .collect(toImmutableMap(
+ orcColumn -> Integer.parseInt(orcColumn.getAttributes().get(ORC_ICEBERG_ID_KEY)),
+ orcColumn -> IcebergOrcColumn.copy(orcColumn).setIcebergColumnId(Optional.of(Integer.parseInt(orcColumn.getAttributes().get(ORC_ICEBERG_ID_KEY))))));
+
+ Map fileOrcColumnsByName = uniqueIndex(fileOrcColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
+
+ int nextMissingColumnIndex = fileOrcColumnsByName.size();
+ for (IcebergColumnHandle column : regularColumns) {
+ IcebergOrcColumn icebergOrcColumn;
+ boolean isExcludeColumn = false;
+
+ if (fileOrcColumnByIcebergId.isEmpty()) {
+ icebergOrcColumn = fileOrcColumnsByName.get(column.getName());
+ }
+ else {
+ icebergOrcColumn = fileOrcColumnByIcebergId.get(column.getId());
+ if (icebergOrcColumn == null) {
+ // Cannot get orc column from 'fileOrcColumnByIcebergId', which means SchemaEvolution may have happened, so we get orc column by column name.
+ icebergOrcColumn = fileOrcColumnsByName.get(column.getName());
+ if (icebergOrcColumn != null) {
+ isExcludeColumn = true;
+ }
+ }
+ }
+
+ if (icebergOrcColumn != null) {
+ HiveColumnHandle columnHandle = new HiveColumnHandle(
+ // Todo: using orc file column name
+ column.getName(),
+ toHiveType(column.getType()),
+ column.getType().getTypeSignature(),
+ icebergOrcColumn.getOrcColumnId(),
+ icebergOrcColumn.getColumnType(),
+ Optional.empty(),
+ Optional.empty());
+
+ physicalColumnHandles.add(columnHandle);
+ // Skip SchemaEvolution column
+ if (!isExcludeColumn) {
+ includedColumns.put(columnHandle.getHiveColumnIndex(), typeManager.getType(columnHandle.getTypeSignature()));
+ columnReferences.add(new TupleDomainOrcPredicate.ColumnReference<>(columnHandle, columnHandle.getHiveColumnIndex(), typeManager.getType(columnHandle.getTypeSignature())));
+ }
+ }
+ else {
+ physicalColumnHandles.add(new HiveColumnHandle(
+ column.getName(),
+ toHiveType(column.getType()),
+ column.getType().getTypeSignature(),
+ nextMissingColumnIndex++,
+ REGULAR,
+ Optional.empty(),
+ Optional.empty()));
+ }
+ }
+
+ TupleDomain hiveColumnHandleTupleDomain = effectivePredicate.transform(column -> {
+ IcebergOrcColumn icebergOrcColumn;
+ if (fileOrcColumnByIcebergId.isEmpty()) {
+ icebergOrcColumn = fileOrcColumnsByName.get(column.getName());
+ }
+ else {
+ icebergOrcColumn = fileOrcColumnByIcebergId.get(column.getId());
+ if (icebergOrcColumn == null) {
+ // Cannot get orc column from 'fileOrcColumnByIcebergId', which means SchemaEvolution may have happened, so we get orc column by column name.
+ icebergOrcColumn = fileOrcColumnsByName.get(column.getName());
+ }
+ }
+
+ return new HiveColumnHandle(
+ column.getName(),
+ toHiveType(column.getType()),
+ column.getType().getTypeSignature(),
+ // Note: the HiveColumnHandle.hiveColumnIndex starts from '0' while the IcebergColumnHandle.id starts from '1'
+ icebergOrcColumn != null ? icebergOrcColumn.getOrcColumnId() : column.getId() - 1,
+ icebergOrcColumn != null ? icebergOrcColumn.getColumnType() : REGULAR,
+ Optional.empty(),
+ Optional.empty());
+ });
+
+ OrcPredicate predicate = new TupleDomainOrcPredicate<>(hiveColumnHandleTupleDomain, columnReferences.build(), orcBloomFiltersEnabled, Optional.of(domainCompactionThreshold));
+
+ OrcAggregatedMemoryContext systemMemoryUsage = new HiveOrcAggregatedMemoryContext();
+ OrcBatchRecordReader recordReader = reader.createBatchRecordReader(
+ includedColumns.build(),
+ predicate,
+ start,
+ length,
+ UTC,
+ systemMemoryUsage,
+ INITIAL_BATCH_SIZE);
+
+ return new OrcBatchPageSource(
+ recordReader,
+ orcDataSource,
+ physicalColumnHandles,
+ typeManager,
+ systemMemoryUsage,
+ stats,
+ new RuntimeStats());
+ }
+ catch (Exception e) {
+ if (orcDataSource != null) {
+ try {
+ orcDataSource.close();
+ }
+ catch (IOException ignored) {
+ }
+ }
+ if (e instanceof PrestoException) {
+ throw (PrestoException) e;
+ }
+ String message = format("Error opening Iceberg split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
+ if (e instanceof BlockMissingException) {
+ throw new PrestoException(ICEBERG_MISSING_DATA, message, e);
+ }
+ throw new PrestoException(ICEBERG_CANNOT_OPEN_SPLIT, message, e);
+ }
+ }
+
+ private static List getFileOrcColumns(OrcReader reader)
+ {
+ List orcTypes = reader.getFooter().getTypes();
+ OrcType rootOrcType = orcTypes.get(ROOT_COLUMN_ID);
+
+ List columnAttributes = ImmutableList.of();
+ if (rootOrcType.getOrcTypeKind() == OrcType.OrcTypeKind.STRUCT) {
+ columnAttributes = IntStream.range(0, rootOrcType.getFieldCount())
+ .mapToObj(fieldId -> new IcebergOrcColumn(
+ fieldId,
+ rootOrcType.getFieldTypeIndex(fieldId),
+ // We will filter out iceberg column by 'ORC_ICEBERG_ID_KEY' later,
+ // so we use 'Optional.empty()' temporarily.
+ Optional.empty(),
+ rootOrcType.getFieldName(fieldId),
+ REGULAR,
+ orcTypes.get(rootOrcType.getFieldTypeIndex(fieldId)).getOrcTypeKind(),
+ orcTypes.get(rootOrcType.getFieldTypeIndex(fieldId)).getAttributes()))
+ .collect(toImmutableList());
+ }
+ else if (rootOrcType.getOrcTypeKind() == OrcType.OrcTypeKind.LIST) {
+ columnAttributes = ImmutableList.of(
+ new IcebergOrcColumn(
+ 0,
+ rootOrcType.getFieldTypeIndex(0),
+ Optional.empty(),
+ "item",
+ REGULAR,
+ orcTypes.get(rootOrcType.getFieldTypeIndex(0)).getOrcTypeKind(),
+ orcTypes.get(rootOrcType.getFieldTypeIndex(0)).getAttributes()));
+ }
+ else if (rootOrcType.getOrcTypeKind() == OrcType.OrcTypeKind.MAP) {
+ columnAttributes = ImmutableList.of(
+ new IcebergOrcColumn(
+ 0,
+ rootOrcType.getFieldTypeIndex(0),
+ Optional.empty(),
+ "key",
+ REGULAR,
+ orcTypes.get(rootOrcType.getFieldTypeIndex(0)).getOrcTypeKind(),
+ orcTypes.get(rootOrcType.getFieldTypeIndex(0)).getAttributes()),
+ new IcebergOrcColumn(
+ 1,
+ rootOrcType.getFieldTypeIndex(1),
+ Optional.empty(),
+ "value",
+ REGULAR,
+ orcTypes.get(rootOrcType.getFieldTypeIndex(1)).getOrcTypeKind(),
+ orcTypes.get(rootOrcType.getFieldTypeIndex(1)).getAttributes()));
+ }
+ else if (rootOrcType.getOrcTypeKind() == OrcType.OrcTypeKind.UNION) {
+ columnAttributes = IntStream.range(0, rootOrcType.getFieldCount())
+ .mapToObj(fieldId -> new IcebergOrcColumn(
+ fieldId,
+ rootOrcType.getFieldTypeIndex(fieldId),
+ Optional.empty(),
+ "field" + fieldId,
+ REGULAR,
+ orcTypes.get(rootOrcType.getFieldTypeIndex(fieldId)).getOrcTypeKind(),
+ orcTypes.get(rootOrcType.getFieldTypeIndex(fieldId)).getAttributes()))
+ .collect(toImmutableList());
+ }
+ return columnAttributes;
+ }
}
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergSessionProperties.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergSessionProperties.java
index 58230dc93ab43..f5d2bf76ae656 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergSessionProperties.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergSessionProperties.java
@@ -15,8 +15,11 @@
import com.facebook.presto.hive.HiveClientConfig;
import com.facebook.presto.hive.HiveCompressionCodec;
+import com.facebook.presto.hive.OrcFileWriterConfig;
import com.facebook.presto.hive.ParquetFileWriterConfig;
+import com.facebook.presto.orc.OrcWriteValidation;
import com.facebook.presto.spi.ConnectorSession;
+import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.session.PropertyMetadata;
import com.google.common.collect.ImmutableList;
import io.airlift.units.DataSize;
@@ -24,10 +27,18 @@
import javax.inject.Inject;
import java.util.List;
+import java.util.concurrent.ThreadLocalRandom;
+import static com.facebook.presto.common.type.DoubleType.DOUBLE;
import static com.facebook.presto.common.type.VarcharType.VARCHAR;
import static com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType;
+import static com.facebook.presto.spi.StandardErrorCode.INVALID_SESSION_PROPERTY;
import static com.facebook.presto.spi.session.PropertyMetadata.booleanProperty;
+import static com.facebook.presto.spi.session.PropertyMetadata.integerProperty;
+import static com.facebook.presto.spi.session.PropertyMetadata.stringProperty;
+import static com.google.common.base.Preconditions.checkArgument;
+import static java.lang.String.format;
+import static java.util.Locale.ENGLISH;
public final class IcebergSessionProperties
{
@@ -39,13 +50,32 @@ public final class IcebergSessionProperties
private static final String PARQUET_USE_COLUMN_NAMES = "parquet_use_column_names";
private static final String PARQUET_BATCH_READ_OPTIMIZATION_ENABLED = "parquet_batch_read_optimization_enabled";
private static final String PARQUET_BATCH_READER_VERIFICATION_ENABLED = "parquet_batch_reader_verification_enabled";
+ private static final String ORC_BLOOM_FILTERS_ENABLED = "orc_bloom_filters_enabled";
+ private static final String ORC_MAX_MERGE_DISTANCE = "orc_max_merge_distance";
+ private static final String ORC_MAX_BUFFER_SIZE = "orc_max_buffer_size";
+ private static final String ORC_STREAM_BUFFER_SIZE = "orc_stream_buffer_size";
+ private static final String ORC_TINY_STRIPE_THRESHOLD = "orc_tiny_stripe_threshold";
+ private static final String ORC_MAX_READ_BLOCK_SIZE = "orc_max_read_block_size";
+ private static final String ORC_LAZY_READ_SMALL_RANGES = "orc_lazy_read_small_ranges";
+ private static final String ORC_ZSTD_JNI_DECOMPRESSION_ENABLED = "orc_zstd_jni_decompression_enabled";
+ private static final String ORC_STRING_STATISTICS_LIMIT = "orc_string_statistics_limit";
+ private static final String ORC_OPTIMIZED_WRITER_ENABLED = "orc_optimized_writer_enabled";
+ private static final String ORC_OPTIMIZED_WRITER_VALIDATE = "orc_optimized_writer_validate";
+ private static final String ORC_OPTIMIZED_WRITER_VALIDATE_PERCENTAGE = "orc_optimized_writer_validate_percentage";
+ private static final String ORC_OPTIMIZED_WRITER_VALIDATE_MODE = "orc_optimized_writer_validate_mode";
+ private static final String ORC_OPTIMIZED_WRITER_MIN_STRIPE_SIZE = "orc_optimized_writer_min_stripe_size";
+ private static final String ORC_OPTIMIZED_WRITER_MAX_STRIPE_SIZE = "orc_optimized_writer_max_stripe_size";
+ private static final String ORC_OPTIMIZED_WRITER_MAX_STRIPE_ROWS = "orc_optimized_writer_max_stripe_rows";
+ private static final String ORC_OPTIMIZED_WRITER_MAX_DICTIONARY_MEMORY = "orc_optimized_writer_max_dictionary_memory";
+ private static final String ORC_COMPRESSION_CODEC = "orc_compression_codec";
private final List> sessionProperties;
@Inject
public IcebergSessionProperties(
IcebergConfig icebergConfig,
HiveClientConfig hiveClientConfig,
- ParquetFileWriterConfig parquetFileWriterConfig)
+ ParquetFileWriterConfig parquetFileWriterConfig,
+ OrcFileWriterConfig orcFileWriterConfig)
{
sessionProperties = ImmutableList.of(
new PropertyMetadata<>(
@@ -91,7 +121,113 @@ public IcebergSessionProperties(
PARQUET_WRITER_PAGE_SIZE,
"Parquet: Writer page size",
parquetFileWriterConfig.getPageSize(),
- false));
+ false),
+ booleanProperty(
+ ORC_BLOOM_FILTERS_ENABLED,
+ "ORC: Enable bloom filters for predicate pushdown",
+ hiveClientConfig.isOrcBloomFiltersEnabled(),
+ false),
+ dataSizeSessionProperty(
+ ORC_MAX_MERGE_DISTANCE,
+ "ORC: Maximum size of gap between two reads to merge into a single read",
+ hiveClientConfig.getOrcMaxMergeDistance(),
+ false),
+ dataSizeSessionProperty(
+ ORC_MAX_BUFFER_SIZE,
+ "ORC: Maximum size of a single read",
+ hiveClientConfig.getOrcMaxBufferSize(),
+ false),
+ dataSizeSessionProperty(
+ ORC_STREAM_BUFFER_SIZE,
+ "ORC: Size of buffer for streaming reads",
+ hiveClientConfig.getOrcStreamBufferSize(),
+ false),
+ dataSizeSessionProperty(
+ ORC_TINY_STRIPE_THRESHOLD,
+ "ORC: Threshold below which an ORC stripe or file will read in its entirety",
+ hiveClientConfig.getOrcTinyStripeThreshold(),
+ false),
+ dataSizeSessionProperty(
+ ORC_MAX_READ_BLOCK_SIZE,
+ "ORC: Soft max size of Presto blocks produced by ORC reader",
+ hiveClientConfig.getOrcMaxReadBlockSize(),
+ false),
+ booleanProperty(
+ ORC_LAZY_READ_SMALL_RANGES,
+ "Experimental: ORC: Read small file segments lazily",
+ hiveClientConfig.isOrcLazyReadSmallRanges(),
+ false),
+ booleanProperty(
+ ORC_ZSTD_JNI_DECOMPRESSION_ENABLED,
+ "use JNI based zstd decompression for reading ORC files",
+ hiveClientConfig.isZstdJniDecompressionEnabled(),
+ true),
+ dataSizeSessionProperty(
+ ORC_STRING_STATISTICS_LIMIT,
+ "ORC: Maximum size of string statistics; drop if exceeding",
+ orcFileWriterConfig.getStringStatisticsLimit(),
+ false),
+ booleanProperty(
+ ORC_OPTIMIZED_WRITER_ENABLED,
+ "Experimental: ORC: Enable optimized writer",
+ hiveClientConfig.isOrcOptimizedWriterEnabled(),
+ false),
+ booleanProperty(
+ ORC_OPTIMIZED_WRITER_VALIDATE,
+ "Experimental: ORC: Force all validation for files",
+ hiveClientConfig.getOrcWriterValidationPercentage() > 0.0,
+ false),
+ new PropertyMetadata<>(
+ ORC_OPTIMIZED_WRITER_VALIDATE_PERCENTAGE,
+ "Experimental: ORC: sample percentage for validation for files",
+ DOUBLE,
+ Double.class,
+ hiveClientConfig.getOrcWriterValidationPercentage(),
+ false,
+ value -> {
+ double doubleValue = ((Number) value).doubleValue();
+ if (doubleValue < 0.0 || doubleValue > 100.0) {
+ throw new PrestoException(
+ INVALID_SESSION_PROPERTY,
+ format("%s must be between 0.0 and 100.0 inclusive: %s", ORC_OPTIMIZED_WRITER_VALIDATE_PERCENTAGE, doubleValue));
+ }
+ return doubleValue;
+ },
+ value -> value),
+ stringProperty(
+ ORC_OPTIMIZED_WRITER_VALIDATE_MODE,
+ "Experimental: ORC: Level of detail in ORC validation",
+ hiveClientConfig.getOrcWriterValidationMode().toString(),
+ false),
+ dataSizeSessionProperty(
+ ORC_OPTIMIZED_WRITER_MIN_STRIPE_SIZE,
+ "Experimental: ORC: Min stripe size",
+ orcFileWriterConfig.getStripeMinSize(),
+ false),
+ dataSizeSessionProperty(
+ ORC_OPTIMIZED_WRITER_MAX_STRIPE_SIZE,
+ "Experimental: ORC: Max stripe size",
+ orcFileWriterConfig.getStripeMaxSize(),
+ false),
+ integerProperty(
+ ORC_OPTIMIZED_WRITER_MAX_STRIPE_ROWS,
+ "Experimental: ORC: Max stripe row count",
+ orcFileWriterConfig.getStripeMaxRowCount(),
+ false),
+ dataSizeSessionProperty(
+ ORC_OPTIMIZED_WRITER_MAX_DICTIONARY_MEMORY,
+ "Experimental: ORC: Max dictionary memory",
+ orcFileWriterConfig.getDictionaryMaxMemory(),
+ false),
+ new PropertyMetadata<>(
+ ORC_COMPRESSION_CODEC,
+ "The preferred compression codec to use when writing ORC and DWRF files",
+ VARCHAR,
+ HiveCompressionCodec.class,
+ hiveClientConfig.getOrcCompressionCodec(),
+ false,
+ value -> HiveCompressionCodec.valueOf(((String) value).toUpperCase()),
+ HiveCompressionCodec::name));
}
public List> getSessionProperties()
@@ -136,4 +272,101 @@ public static PropertyMetadata dataSizeSessionProperty(String name, St
value -> DataSize.valueOf((String) value),
DataSize::toString);
}
+
+ public static boolean isOrcBloomFiltersEnabled(ConnectorSession session)
+ {
+ return session.getProperty(ORC_BLOOM_FILTERS_ENABLED, Boolean.class);
+ }
+
+ public static DataSize getOrcMaxMergeDistance(ConnectorSession session)
+ {
+ return session.getProperty(ORC_MAX_MERGE_DISTANCE, DataSize.class);
+ }
+
+ public static DataSize getOrcMaxBufferSize(ConnectorSession session)
+ {
+ return session.getProperty(ORC_MAX_BUFFER_SIZE, DataSize.class);
+ }
+
+ public static DataSize getOrcStreamBufferSize(ConnectorSession session)
+ {
+ return session.getProperty(ORC_STREAM_BUFFER_SIZE, DataSize.class);
+ }
+
+ public static DataSize getOrcTinyStripeThreshold(ConnectorSession session)
+ {
+ return session.getProperty(ORC_TINY_STRIPE_THRESHOLD, DataSize.class);
+ }
+
+ public static DataSize getOrcMaxReadBlockSize(ConnectorSession session)
+ {
+ return session.getProperty(ORC_MAX_READ_BLOCK_SIZE, DataSize.class);
+ }
+
+ public static boolean getOrcLazyReadSmallRanges(ConnectorSession session)
+ {
+ return session.getProperty(ORC_LAZY_READ_SMALL_RANGES, Boolean.class);
+ }
+
+ public static boolean isOrcZstdJniDecompressionEnabled(ConnectorSession session)
+ {
+ return session.getProperty(ORC_ZSTD_JNI_DECOMPRESSION_ENABLED, Boolean.class);
+ }
+
+ public static DataSize getOrcStringStatisticsLimit(ConnectorSession session)
+ {
+ return session.getProperty(ORC_STRING_STATISTICS_LIMIT, DataSize.class);
+ }
+
+ public static boolean isOrcOptimizedWriterEnabled(ConnectorSession session)
+ {
+ return session.getProperty(ORC_OPTIMIZED_WRITER_ENABLED, Boolean.class);
+ }
+
+ public static boolean isOrcOptimizedWriterValidate(ConnectorSession session)
+ {
+ boolean validate = session.getProperty(ORC_OPTIMIZED_WRITER_VALIDATE, Boolean.class);
+ double percentage = session.getProperty(ORC_OPTIMIZED_WRITER_VALIDATE_PERCENTAGE, Double.class);
+
+ checkArgument(percentage >= 0.0 && percentage <= 100.0);
+
+ // session property can disabled validation
+ if (!validate) {
+ return false;
+ }
+
+ // session property can not force validation when sampling is enabled
+ // todo change this if session properties support null
+ return ThreadLocalRandom.current().nextDouble(100) < percentage;
+ }
+
+ public static OrcWriteValidation.OrcWriteValidationMode getOrcOptimizedWriterValidateMode(ConnectorSession session)
+ {
+ return OrcWriteValidation.OrcWriteValidationMode.valueOf(session.getProperty(ORC_OPTIMIZED_WRITER_VALIDATE_MODE, String.class).toUpperCase(ENGLISH));
+ }
+
+ public static DataSize getOrcOptimizedWriterMinStripeSize(ConnectorSession session)
+ {
+ return session.getProperty(ORC_OPTIMIZED_WRITER_MIN_STRIPE_SIZE, DataSize.class);
+ }
+
+ public static DataSize getOrcOptimizedWriterMaxStripeSize(ConnectorSession session)
+ {
+ return session.getProperty(ORC_OPTIMIZED_WRITER_MAX_STRIPE_SIZE, DataSize.class);
+ }
+
+ public static int getOrcOptimizedWriterMaxStripeRows(ConnectorSession session)
+ {
+ return session.getProperty(ORC_OPTIMIZED_WRITER_MAX_STRIPE_ROWS, Integer.class);
+ }
+
+ public static DataSize getOrcOptimizedWriterMaxDictionaryMemory(ConnectorSession session)
+ {
+ return session.getProperty(ORC_OPTIMIZED_WRITER_MAX_DICTIONARY_MEMORY, DataSize.class);
+ }
+
+ public static HiveCompressionCodec getOrcCompressionCodec(ConnectorSession session)
+ {
+ return session.getProperty(ORC_COMPRESSION_CODEC, HiveCompressionCodec.class);
+ }
}
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergUtil.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergUtil.java
index 633cdca03cd80..6dc2499066ca0 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergUtil.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergUtil.java
@@ -13,6 +13,7 @@
*/
package com.facebook.presto.iceberg;
+import com.facebook.presto.common.predicate.TupleDomain;
import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.hive.HdfsContext;
import com.facebook.presto.hive.HdfsEnvironment;
@@ -30,6 +31,8 @@
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableOperations;
+import org.apache.iceberg.TableScan;
+import org.apache.iceberg.expressions.Expression;
import java.util.List;
import java.util.Locale;
@@ -42,6 +45,7 @@
import static com.facebook.presto.iceberg.TypeConverter.toPrestoType;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.Lists.reverse;
+import static com.google.common.collect.Streams.stream;
import static java.lang.String.format;
import static org.apache.iceberg.BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE;
import static org.apache.iceberg.BaseMetastoreTableOperations.TABLE_TYPE_PROP;
@@ -141,4 +145,19 @@ private static String quotedName(String name)
}
return '"' + name.replace("\"", "\"\"") + '"';
}
+
+ public static TableScan getTableScan(TupleDomain predicates, Optional snapshotId, Table icebergTable)
+ {
+ Expression expression = ExpressionConverter.toIcebergExpression(predicates);
+ TableScan tableScan = icebergTable.newScan().filter(expression);
+ return snapshotId
+ .map(id -> isSnapshot(icebergTable, id) ? tableScan.useSnapshot(id) : tableScan.asOfTime(id))
+ .orElse(tableScan);
+ }
+
+ private static boolean isSnapshot(Table icebergTable, Long id)
+ {
+ return stream(icebergTable.snapshots())
+ .anyMatch(snapshot -> snapshot.snapshotId() == id);
+ }
}
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/InternalIcebergConnectorFactory.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/InternalIcebergConnectorFactory.java
index b850cbd996118..725e22ac2c9e0 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/InternalIcebergConnectorFactory.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/InternalIcebergConnectorFactory.java
@@ -17,8 +17,8 @@
import com.facebook.airlift.bootstrap.LifeCycleManager;
import com.facebook.airlift.event.client.EventModule;
import com.facebook.airlift.json.JsonModule;
-import com.facebook.presto.client.NodeVersion;
import com.facebook.presto.common.type.TypeManager;
+import com.facebook.presto.hive.NodeVersion;
import com.facebook.presto.hive.authentication.HiveAuthenticationModule;
import com.facebook.presto.hive.metastore.ExtendedHiveMetastore;
import com.facebook.presto.hive.metastore.HiveMetastoreModule;
@@ -60,7 +60,7 @@ public static Connector createConnector(String catalogName, Map
new EventModule(),
new MBeanModule(),
new JsonModule(),
- new IcebergModule(),
+ new IcebergModule(catalogName),
new IcebergMetastoreModule(),
new HiveS3Module(catalogName),
new HiveAuthenticationModule(),
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/ManifestsTable.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/ManifestsTable.java
index ad6aabf3370f9..295b835607ee2 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/ManifestsTable.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/ManifestsTable.java
@@ -14,7 +14,10 @@
package com.facebook.presto.iceberg;
import com.facebook.presto.common.Page;
+import com.facebook.presto.common.block.BlockBuilder;
import com.facebook.presto.common.predicate.TupleDomain;
+import com.facebook.presto.common.type.ArrayType;
+import com.facebook.presto.common.type.RowType;
import com.facebook.presto.iceberg.util.PageListBuilder;
import com.facebook.presto.spi.ColumnMetadata;
import com.facebook.presto.spi.ConnectorPageSource;
@@ -26,19 +29,25 @@
import com.facebook.presto.spi.SystemTable;
import com.facebook.presto.spi.connector.ConnectorTransactionHandle;
import com.google.common.collect.ImmutableList;
+import org.apache.iceberg.ManifestFile;
+import org.apache.iceberg.PartitionField;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.Table;
+import org.apache.iceberg.types.Conversions;
+import org.apache.iceberg.types.Type;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import static com.facebook.presto.common.type.BigintType.BIGINT;
+import static com.facebook.presto.common.type.BooleanType.BOOLEAN;
import static com.facebook.presto.common.type.IntegerType.INTEGER;
import static com.facebook.presto.common.type.VarcharType.VARCHAR;
import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA;
import static java.lang.String.format;
+import static java.util.Arrays.asList;
import static java.util.Objects.requireNonNull;
public class ManifestsTable
@@ -62,6 +71,10 @@ public ManifestsTable(SchemaTableName tableName, Table icebergTable, Optional buildPages(ConnectorTableMetadata tableMetadata, Table
pagesBuilder.appendInteger(file.addedFilesCount());
pagesBuilder.appendInteger(file.existingFilesCount());
pagesBuilder.appendInteger(file.deletedFilesCount());
+ writePartitionSummaries(pagesBuilder.nextColumn(), file.partitions(), partitionSpecsById.get(file.partitionSpecId()));
pagesBuilder.endRow();
});
return pagesBuilder.build();
}
+
+ private static void writePartitionSummaries(BlockBuilder arrayBlockBuilder, List summaries, PartitionSpec partitionSpec)
+ {
+ BlockBuilder singleArrayWriter = arrayBlockBuilder.beginBlockEntry();
+ for (int i = 0; i < summaries.size(); i++) {
+ ManifestFile.PartitionFieldSummary summary = summaries.get(i);
+ PartitionField field = partitionSpec.fields().get(i);
+ Type nestedType = partitionSpec.partitionType().fields().get(i).type();
+
+ BlockBuilder rowBuilder = singleArrayWriter.beginBlockEntry();
+ BOOLEAN.writeBoolean(rowBuilder, summary.containsNull());
+ VARCHAR.writeString(rowBuilder, field.transform().toHumanString(
+ Conversions.fromByteBuffer(nestedType, summary.lowerBound())));
+ VARCHAR.writeString(rowBuilder, field.transform().toHumanString(
+ Conversions.fromByteBuffer(nestedType, summary.upperBound())));
+ singleArrayWriter.closeEntry();
+ }
+ arrayBlockBuilder.closeEntry();
+ }
}
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/SnapshotsTable.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/SnapshotsTable.java
index 322ea0e690bec..6e10dd6b2cf8d 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/SnapshotsTable.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/SnapshotsTable.java
@@ -15,7 +15,9 @@
import com.facebook.presto.common.Page;
import com.facebook.presto.common.predicate.TupleDomain;
+import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.common.type.TypeManager;
+import com.facebook.presto.common.type.TypeSignatureParameter;
import com.facebook.presto.iceberg.util.PageListBuilder;
import com.facebook.presto.spi.ColumnMetadata;
import com.facebook.presto.spi.ConnectorPageSource;
@@ -53,6 +55,9 @@ public SnapshotsTable(SchemaTableName tableName, TypeManager typeManager, Table
.add(new ColumnMetadata("parent_id", BIGINT))
.add(new ColumnMetadata("operation", VARCHAR))
.add(new ColumnMetadata("manifest_list", VARCHAR))
+ .add(new ColumnMetadata("summary", typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(
+ TypeSignatureParameter.of(VARCHAR.getTypeSignature()),
+ TypeSignatureParameter.of(VARCHAR.getTypeSignature())))))
.build());
}
@@ -91,6 +96,9 @@ private static List buildPages(ConnectorTableMetadata tableMetadata, Conne
if (checkNonNull(snapshot.manifestListLocation(), pagesBuilder)) {
pagesBuilder.appendVarchar(snapshot.manifestListLocation());
}
+ if (checkNonNull(snapshot.summary(), pagesBuilder)) {
+ pagesBuilder.appendVarcharVarcharMap(snapshot.summary());
+ }
pagesBuilder.endRow();
});
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/TypeConverter.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/TypeConverter.java
index c00c5b86de33b..8058b224f49b4 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/TypeConverter.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/TypeConverter.java
@@ -16,11 +16,13 @@
import com.facebook.presto.common.type.ArrayType;
import com.facebook.presto.common.type.BigintType;
import com.facebook.presto.common.type.BooleanType;
+import com.facebook.presto.common.type.CharType;
import com.facebook.presto.common.type.DateType;
import com.facebook.presto.common.type.DecimalType;
import com.facebook.presto.common.type.DoubleType;
import com.facebook.presto.common.type.IntegerType;
import com.facebook.presto.common.type.MapType;
+import com.facebook.presto.common.type.NamedTypeSignature;
import com.facebook.presto.common.type.RealType;
import com.facebook.presto.common.type.RowType;
import com.facebook.presto.common.type.StandardTypes;
@@ -33,17 +35,57 @@
import com.facebook.presto.common.type.TypeSignatureParameter;
import com.facebook.presto.common.type.VarbinaryType;
import com.facebook.presto.common.type.VarcharType;
+import com.facebook.presto.hive.HiveType;
+import com.facebook.presto.orc.metadata.OrcType;
import com.facebook.presto.spi.PrestoException;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.iceberg.Schema;
import org.apache.iceberg.types.Types;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
import java.util.Optional;
+import static com.facebook.presto.common.type.BigintType.BIGINT;
+import static com.facebook.presto.common.type.BooleanType.BOOLEAN;
+import static com.facebook.presto.common.type.DateType.DATE;
+import static com.facebook.presto.common.type.DoubleType.DOUBLE;
+import static com.facebook.presto.common.type.IntegerType.INTEGER;
+import static com.facebook.presto.common.type.RealType.REAL;
+import static com.facebook.presto.common.type.SmallintType.SMALLINT;
+import static com.facebook.presto.common.type.TimestampType.TIMESTAMP;
+import static com.facebook.presto.common.type.TimestampWithTimeZoneType.TIMESTAMP_WITH_TIME_ZONE;
+import static com.facebook.presto.common.type.TinyintType.TINYINT;
+import static com.facebook.presto.common.type.VarbinaryType.VARBINARY;
+import static com.facebook.presto.hive.HiveType.HIVE_BINARY;
+import static com.facebook.presto.hive.HiveType.HIVE_BOOLEAN;
+import static com.facebook.presto.hive.HiveType.HIVE_BYTE;
+import static com.facebook.presto.hive.HiveType.HIVE_DATE;
+import static com.facebook.presto.hive.HiveType.HIVE_DOUBLE;
+import static com.facebook.presto.hive.HiveType.HIVE_FLOAT;
+import static com.facebook.presto.hive.HiveType.HIVE_INT;
+import static com.facebook.presto.hive.HiveType.HIVE_LONG;
+import static com.facebook.presto.hive.HiveType.HIVE_SHORT;
+import static com.facebook.presto.hive.HiveType.HIVE_STRING;
+import static com.facebook.presto.hive.HiveType.HIVE_TIMESTAMP;
+import static com.facebook.presto.hive.metastore.MetastoreUtil.isArrayType;
+import static com.facebook.presto.hive.metastore.MetastoreUtil.isMapType;
+import static com.facebook.presto.hive.metastore.MetastoreUtil.isRowType;
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.lang.String.format;
+import static java.util.stream.Collectors.toList;
+import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo;
+import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getListTypeInfo;
+import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getMapTypeInfo;
+import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo;
+import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getVarcharTypeInfo;
public final class TypeConverter
{
@@ -144,6 +186,11 @@ public static org.apache.iceberg.types.Type toIcebergType(Type type)
throw new PrestoException(NOT_SUPPORTED, "Type not supported for Iceberg: " + type.getDisplayName());
}
+ public static HiveType toHiveType(Type type)
+ {
+ return HiveType.toHiveType(toHiveTypeInfo(type));
+ }
+
private static org.apache.iceberg.types.Type fromDecimal(DecimalType type)
{
return Types.DecimalType.of(type.getPrecision(), type.getScale());
@@ -169,4 +216,223 @@ private static org.apache.iceberg.types.Type fromMap(MapType type)
{
return Types.MapType.ofOptional(1, 2, toIcebergType(type.getKeyType()), toIcebergType(type.getValueType()));
}
+
+ private static TypeInfo toHiveTypeInfo(Type type)
+ {
+ if (BOOLEAN.equals(type)) {
+ return HIVE_BOOLEAN.getTypeInfo();
+ }
+ if (BIGINT.equals(type)) {
+ return HIVE_LONG.getTypeInfo();
+ }
+ if (INTEGER.equals(type)) {
+ return HIVE_INT.getTypeInfo();
+ }
+ if (SMALLINT.equals(type)) {
+ return HIVE_SHORT.getTypeInfo();
+ }
+ if (TINYINT.equals(type)) {
+ return HIVE_BYTE.getTypeInfo();
+ }
+ if (REAL.equals(type)) {
+ return HIVE_FLOAT.getTypeInfo();
+ }
+ if (DOUBLE.equals(type)) {
+ return HIVE_DOUBLE.getTypeInfo();
+ }
+ if (type instanceof VarcharType) {
+ VarcharType varcharType = (VarcharType) type;
+ if (varcharType.isUnbounded()) {
+ return HIVE_STRING.getTypeInfo();
+ }
+ if (varcharType.getLengthSafe() <= HiveVarchar.MAX_VARCHAR_LENGTH) {
+ return getVarcharTypeInfo(varcharType.getLengthSafe());
+ }
+ throw new PrestoException(NOT_SUPPORTED, format("Unsupported Hive type: %s. Supported VARCHAR types: VARCHAR(<=%d), VARCHAR.", type, HiveVarchar.MAX_VARCHAR_LENGTH));
+ }
+ if (type instanceof CharType) {
+ CharType charType = (CharType) type;
+ int charLength = charType.getLength();
+ if (charLength <= HiveChar.MAX_CHAR_LENGTH) {
+ return getCharTypeInfo(charLength);
+ }
+ throw new PrestoException(NOT_SUPPORTED, format("Unsupported Hive type: %s. Supported CHAR types: CHAR(<=%d).",
+ type, HiveChar.MAX_CHAR_LENGTH));
+ }
+ if (VARBINARY.equals(type)) {
+ return HIVE_BINARY.getTypeInfo();
+ }
+ if (DATE.equals(type)) {
+ return HIVE_DATE.getTypeInfo();
+ }
+ if (TIMESTAMP.equals(type)) {
+ return HIVE_TIMESTAMP.getTypeInfo();
+ }
+ if (TIMESTAMP_WITH_TIME_ZONE.equals(type)) {
+ // Hive does not have TIMESTAMP_WITH_TIME_ZONE, this is just a work around for iceberg.
+ return HIVE_TIMESTAMP.getTypeInfo();
+ }
+ if (type instanceof DecimalType) {
+ DecimalType decimalType = (DecimalType) type;
+ return new DecimalTypeInfo(decimalType.getPrecision(), decimalType.getScale());
+ }
+ if (isArrayType(type)) {
+ TypeInfo elementType = toHiveTypeInfo(type.getTypeParameters().get(0));
+ return getListTypeInfo(elementType);
+ }
+ if (isMapType(type)) {
+ TypeInfo keyType = toHiveTypeInfo(type.getTypeParameters().get(0));
+ TypeInfo valueType = toHiveTypeInfo(type.getTypeParameters().get(1));
+ return getMapTypeInfo(keyType, valueType);
+ }
+ if (isRowType(type)) {
+ ImmutableList.Builder fieldNames = ImmutableList.builder();
+ for (TypeSignatureParameter parameter : type.getTypeSignature().getParameters()) {
+ if (!parameter.isNamedTypeSignature()) {
+ throw new IllegalArgumentException(format("Expected all parameters to be named type, but got %s", parameter));
+ }
+ NamedTypeSignature namedTypeSignature = parameter.getNamedTypeSignature();
+ if (!namedTypeSignature.getName().isPresent()) {
+ throw new PrestoException(NOT_SUPPORTED, format("Anonymous row type is not supported in Hive. Please give each field a name: %s", type));
+ }
+ fieldNames.add(namedTypeSignature.getName().get());
+ }
+ return getStructTypeInfo(
+ fieldNames.build(),
+ type.getTypeParameters().stream()
+ .map(TypeConverter::toHiveTypeInfo)
+ .collect(toList()));
+ }
+ throw new PrestoException(NOT_SUPPORTED, format("Unsupported Hive type: %s", type));
+ }
+
+ public static List toOrcType(Schema schema)
+ {
+ return toOrcStructType(0, schema.asStruct(), ImmutableMap.of());
+ }
+
+ private static List toOrcType(int nextFieldTypeIndex, org.apache.iceberg.types.Type type, Map attributes)
+ {
+ switch (type.typeId()) {
+ case BOOLEAN:
+ return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.BOOLEAN, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes));
+ case INTEGER:
+ return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.INT, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes));
+ case LONG:
+ return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.LONG, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes));
+ case FLOAT:
+ return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.FLOAT, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes));
+ case DOUBLE:
+ return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.DOUBLE, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes));
+ case DATE:
+ return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.DATE, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes));
+ case TIME:
+ return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.INT, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes));
+ case TIMESTAMP:
+ return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.TIMESTAMP, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes));
+ case STRING:
+ return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.STRING, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes));
+ case UUID:
+ return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.BINARY, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes));
+ case FIXED:
+ return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.BINARY, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes));
+ case BINARY:
+ return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.BINARY, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.empty(), Optional.empty(), attributes));
+ case DECIMAL:
+ Types.DecimalType decimalType = (Types.DecimalType) type;
+ return ImmutableList.of(new OrcType(OrcType.OrcTypeKind.DECIMAL, ImmutableList.of(), ImmutableList.of(), Optional.empty(), Optional.of(decimalType.precision()), Optional.of(decimalType.scale()), attributes));
+ case STRUCT:
+ return toOrcStructType(nextFieldTypeIndex, (Types.StructType) type, attributes);
+ case LIST:
+ return toOrcListType(nextFieldTypeIndex, (Types.ListType) type, attributes);
+ case MAP:
+ return toOrcMapType(nextFieldTypeIndex, (Types.MapType) type, attributes);
+ default:
+ throw new PrestoException(NOT_SUPPORTED, "Unsupported Iceberg type: " + type);
+ }
+ }
+
+ private static List toOrcStructType(int nextFieldTypeIndex, Types.StructType structType, Map attributes)
+ {
+ nextFieldTypeIndex++;
+ List fieldTypeIndexes = new ArrayList<>();
+ List fieldNames = new ArrayList<>();
+ List> fieldTypesList = new ArrayList<>();
+ for (Types.NestedField field : structType.fields()) {
+ fieldTypeIndexes.add(nextFieldTypeIndex);
+ fieldNames.add(field.name());
+ Map fieldAttributes = ImmutableMap.builder()
+ .put(ORC_ICEBERG_ID_KEY, Integer.toString(field.fieldId()))
+ .put(ORC_ICEBERG_REQUIRED_KEY, Boolean.toString(field.isRequired()))
+ .build();
+ List fieldOrcTypes = toOrcType(nextFieldTypeIndex, field.type(), fieldAttributes);
+ fieldTypesList.add(fieldOrcTypes);
+ nextFieldTypeIndex += fieldOrcTypes.size();
+ }
+
+ ImmutableList.Builder orcTypes = ImmutableList.builder();
+ orcTypes.add(new OrcType(
+ OrcType.OrcTypeKind.STRUCT,
+ fieldTypeIndexes,
+ fieldNames,
+ Optional.empty(),
+ Optional.empty(),
+ Optional.empty(),
+ attributes));
+ fieldTypesList.forEach(orcTypes::addAll);
+
+ return orcTypes.build();
+ }
+
+ private static List toOrcListType(int nextFieldTypeIndex, Types.ListType listType, Map attributes)
+ {
+ nextFieldTypeIndex++;
+ Map elementAttributes = ImmutableMap.builder()
+ .put(ORC_ICEBERG_ID_KEY, Integer.toString(listType.elementId()))
+ .put(ORC_ICEBERG_REQUIRED_KEY, Boolean.toString(listType.isElementRequired()))
+ .build();
+ List itemTypes = toOrcType(nextFieldTypeIndex, listType.elementType(), elementAttributes);
+
+ List orcTypes = new ArrayList<>();
+ orcTypes.add(new OrcType(
+ OrcType.OrcTypeKind.LIST,
+ ImmutableList.of(nextFieldTypeIndex),
+ ImmutableList.of("item"),
+ Optional.empty(),
+ Optional.empty(),
+ Optional.empty(),
+ attributes));
+
+ orcTypes.addAll(itemTypes);
+ return orcTypes;
+ }
+
+ private static List toOrcMapType(int nextFieldTypeIndex, Types.MapType mapType, Map attributes)
+ {
+ nextFieldTypeIndex++;
+ Map keyAttributes = ImmutableMap.builder()
+ .put(ORC_ICEBERG_ID_KEY, Integer.toString(mapType.keyId()))
+ .put(ORC_ICEBERG_REQUIRED_KEY, Boolean.toString(true))
+ .build();
+ List keyTypes = toOrcType(nextFieldTypeIndex, mapType.keyType(), keyAttributes);
+ Map valueAttributes = ImmutableMap.builder()
+ .put(ORC_ICEBERG_ID_KEY, Integer.toString(mapType.valueId()))
+ .put(ORC_ICEBERG_REQUIRED_KEY, Boolean.toString(mapType.isValueRequired()))
+ .build();
+ List valueTypes = toOrcType(nextFieldTypeIndex + keyTypes.size(), mapType.valueType(), valueAttributes);
+
+ List orcTypes = new ArrayList<>();
+ orcTypes.add(new OrcType(
+ OrcType.OrcTypeKind.MAP,
+ ImmutableList.of(nextFieldTypeIndex, nextFieldTypeIndex + keyTypes.size()),
+ ImmutableList.of("key", "value"),
+ Optional.empty(),
+ Optional.empty(),
+ Optional.empty(),
+ attributes));
+
+ orcTypes.addAll(keyTypes);
+ orcTypes.addAll(valueTypes);
+ return orcTypes;
+ }
}
diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergMetadataListing.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergMetadataListing.java
new file mode 100644
index 0000000000000..9b51febe10b07
--- /dev/null
+++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergMetadataListing.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.iceberg;
+
+import com.facebook.presto.Session;
+import com.facebook.presto.hive.HivePlugin;
+import com.facebook.presto.spi.security.Identity;
+import com.facebook.presto.spi.security.SelectedRole;
+import com.facebook.presto.testing.QueryRunner;
+import com.facebook.presto.tests.AbstractTestQueryFramework;
+import com.facebook.presto.tests.DistributedQueryRunner;
+import com.google.common.collect.ImmutableMap;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import java.nio.file.Path;
+import java.util.Map;
+import java.util.Optional;
+
+import static com.facebook.presto.iceberg.IcebergQueryRunner.ICEBERG_CATALOG;
+import static com.facebook.presto.spi.security.SelectedRole.Type.ROLE;
+import static com.facebook.presto.testing.TestingSession.testSessionBuilder;
+
+public class TestIcebergMetadataListing
+ extends AbstractTestQueryFramework
+{
+ @Override
+ protected QueryRunner createQueryRunner()
+ throws Exception
+ {
+ Session session = testSessionBuilder()
+ .setIdentity(new Identity(
+ "hive",
+ Optional.empty(),
+ ImmutableMap.of("hive", new SelectedRole(ROLE, Optional.of("admin"))),
+ ImmutableMap.of(),
+ ImmutableMap.of()))
+ .build();
+ DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(session).build();
+
+ Path dataDir = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data");
+
+ queryRunner.installPlugin(new IcebergPlugin());
+ Map icebergProperties = ImmutableMap.builder()
+ .put("hive.metastore", "file")
+ .put("hive.metastore.catalog.dir", dataDir.toString() + "/catalog")
+ .build();
+
+ queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", icebergProperties);
+
+ queryRunner.installPlugin(new HivePlugin("hive"));
+ Map hiveProperties = ImmutableMap.builder()
+ .put("hive.metastore", "file")
+ .put("hive.metastore.catalog.dir", dataDir.toString() + "/catalog")
+ .put("hive.security", "sql-standard")
+ .build();
+
+ queryRunner.createCatalog("hive", "hive", hiveProperties);
+
+ return queryRunner;
+ }
+
+ @BeforeClass
+ public void setUp()
+ {
+ assertQuerySucceeds("CREATE SCHEMA hive.test_schema");
+ assertQuerySucceeds("CREATE TABLE iceberg.test_schema.iceberg_table1 (_string VARCHAR, _integer INTEGER)");
+ assertQuerySucceeds("CREATE TABLE iceberg.test_schema.iceberg_table2 (_double DOUBLE) WITH (partitioning = ARRAY['_double'])");
+ assertQuerySucceeds("CREATE TABLE hive.test_schema.hive_table (_double DOUBLE)");
+ }
+
+ @AfterClass(alwaysRun = true)
+ public void tearDown()
+ {
+ assertQuerySucceeds("DROP TABLE IF EXISTS hive.test_schema.hive_table");
+ assertQuerySucceeds("DROP TABLE IF EXISTS iceberg.test_schema.iceberg_table2");
+ assertQuerySucceeds("DROP TABLE IF EXISTS iceberg.test_schema.iceberg_table1");
+ assertQuerySucceeds("DROP SCHEMA IF EXISTS hive.test_schema");
+ }
+
+ @Test
+ public void testTableListing()
+ {
+ // For now, iceberg connector will show all the tables(iceberg and non-iceberg) under a schema.
+ assertQuery("SHOW TABLES FROM iceberg.test_schema", "VALUES 'iceberg_table1', 'iceberg_table2', 'hive_table'");
+ }
+
+ @Test
+ public void testTableColumnListing()
+ {
+ // Verify information_schema.columns does not include columns from non-Iceberg tables
+ assertQuery("SELECT table_name, column_name FROM iceberg.information_schema.columns WHERE table_schema = 'test_schema'",
+ "VALUES ('iceberg_table1', '_string'), ('iceberg_table1', '_integer'), ('iceberg_table2', '_double')");
+ }
+
+ @Test
+ public void testTableDescribing()
+ {
+ assertQuery("DESCRIBE iceberg.test_schema.iceberg_table1", "VALUES ('_string', 'varchar', '', ''), ('_integer', 'integer', '', '')");
+ }
+
+ @Test
+ public void testTableValidation()
+ {
+ assertQuerySucceeds("SELECT * FROM iceberg.test_schema.iceberg_table1");
+ assertQueryFails("SELECT * FROM iceberg.test_schema.hive_table", "Not an Iceberg table: test_schema.hive_table");
+ }
+}
diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergOrcMetricsCollection.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergOrcMetricsCollection.java
new file mode 100644
index 0000000000000..3f7acf7f413f1
--- /dev/null
+++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergOrcMetricsCollection.java
@@ -0,0 +1,297 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.iceberg;
+
+import com.facebook.presto.Session;
+import com.facebook.presto.testing.MaterializedResult;
+import com.facebook.presto.testing.MaterializedRow;
+import com.facebook.presto.testing.QueryRunner;
+import com.facebook.presto.tests.AbstractTestQueryFramework;
+import com.facebook.presto.tests.DistributedQueryRunner;
+import com.facebook.presto.tpch.TpchPlugin;
+import com.google.common.collect.ImmutableMap;
+import org.testng.annotations.Test;
+
+import java.nio.file.Path;
+import java.util.Map;
+
+import static com.facebook.presto.SystemSessionProperties.MAX_DRIVERS_PER_TASK;
+import static com.facebook.presto.SystemSessionProperties.TASK_CONCURRENCY;
+import static com.facebook.presto.SystemSessionProperties.TASK_WRITER_COUNT;
+import static com.facebook.presto.iceberg.IcebergQueryRunner.ICEBERG_CATALOG;
+import static com.facebook.presto.iceberg.TestIcebergOrcMetricsCollection.DataFileRecord.toDataFileRecord;
+import static com.facebook.presto.testing.TestingSession.testSessionBuilder;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
+
+public class TestIcebergOrcMetricsCollection
+ extends AbstractTestQueryFramework
+{
+ @Override
+ protected QueryRunner createQueryRunner()
+ throws Exception
+ {
+ Session session = testSessionBuilder()
+ .setCatalog(ICEBERG_CATALOG)
+ .setSchema("test_schema")
+ .setSystemProperty(TASK_CONCURRENCY, "1")
+ .setSystemProperty(TASK_WRITER_COUNT, "1")
+ .setSystemProperty(MAX_DRIVERS_PER_TASK, "1")
+ .setCatalogSessionProperty(ICEBERG_CATALOG, "orc_string_statistics_limit", Integer.MAX_VALUE + "B")
+ .build();
+ DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(session)
+ .setNodeCount(1)
+ .build();
+
+ queryRunner.installPlugin(new TpchPlugin());
+ queryRunner.createCatalog("tpch", "tpch");
+
+ Path dataDir = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data");
+
+ queryRunner.installPlugin(new IcebergPlugin());
+ Map icebergProperties = ImmutableMap.builder()
+ .put("hive.metastore", "file")
+ .put("hive.metastore.catalog.dir", dataDir.toString() + "/catalog")
+ .build();
+
+ queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", icebergProperties);
+
+ queryRunner.execute("CREATE SCHEMA test_schema");
+
+ return queryRunner;
+ }
+
+ @Test
+ public void testBasic()
+ {
+ assertUpdate("CREATE TABLE orders WITH (format = 'ORC') AS SELECT * FROM tpch.tiny.orders", 15000);
+ MaterializedResult materializedResult = computeActual("SELECT * FROM \"orders$files\"");
+ assertEquals(materializedResult.getRowCount(), 1);
+ DataFileRecord datafile = toDataFileRecord(materializedResult.getMaterializedRows().get(0));
+
+ // Check file format
+ assertEquals(datafile.getFileFormat(), "ORC");
+
+ // Check file row count
+ assertEquals(datafile.getRecordCount(), 15000L);
+
+ // Check per-column value count
+ datafile.getValueCounts().values().forEach(valueCount -> assertEquals(valueCount, (Long) 15000L));
+
+ // Check per-column null value count
+ datafile.getNullValueCounts().values().forEach(nullValueCount -> assertEquals(nullValueCount, (Long) 0L));
+
+ // Check per-column lower bound
+ Map lowerBounds = datafile.getLowerBounds();
+ assertQuery("SELECT min(orderkey) FROM tpch.tiny.orders", "VALUES " + lowerBounds.get(1));
+ assertQuery("SELECT min(custkey) FROM tpch.tiny.orders", "VALUES " + lowerBounds.get(2));
+ assertQuery("SELECT min(orderstatus) FROM tpch.tiny.orders", "VALUES '" + lowerBounds.get(3) + "'");
+ assertQuery("SELECT min(totalprice) FROM tpch.tiny.orders", "VALUES " + lowerBounds.get(4));
+ assertQuery("SELECT min(orderdate) FROM tpch.tiny.orders", "VALUES DATE '" + lowerBounds.get(5) + "'");
+ assertQuery("SELECT min(orderpriority) FROM tpch.tiny.orders", "VALUES '" + lowerBounds.get(6) + "'");
+ assertQuery("SELECT min(clerk) FROM tpch.tiny.orders", "VALUES '" + lowerBounds.get(7) + "'");
+ assertQuery("SELECT min(shippriority) FROM tpch.tiny.orders", "VALUES " + lowerBounds.get(8));
+ assertQuery("SELECT min(comment) FROM tpch.tiny.orders", "VALUES '" + lowerBounds.get(9) + "'");
+
+ // Check per-column upper bound
+ Map upperBounds = datafile.getUpperBounds();
+ assertQuery("SELECT max(orderkey) FROM tpch.tiny.orders", "VALUES " + upperBounds.get(1));
+ assertQuery("SELECT max(custkey) FROM tpch.tiny.orders", "VALUES " + upperBounds.get(2));
+ assertQuery("SELECT max(orderstatus) FROM tpch.tiny.orders", "VALUES '" + upperBounds.get(3) + "'");
+ assertQuery("SELECT max(totalprice) FROM tpch.tiny.orders", "VALUES " + upperBounds.get(4));
+ assertQuery("SELECT max(orderdate) FROM tpch.tiny.orders", "VALUES DATE '" + upperBounds.get(5) + "'");
+ assertQuery("SELECT max(orderpriority) FROM tpch.tiny.orders", "VALUES '" + upperBounds.get(6) + "'");
+ assertQuery("SELECT max(clerk) FROM tpch.tiny.orders", "VALUES '" + upperBounds.get(7) + "'");
+ assertQuery("SELECT max(shippriority) FROM tpch.tiny.orders", "VALUES " + upperBounds.get(8));
+ assertQuery("SELECT max(comment) FROM tpch.tiny.orders", "VALUES '" + upperBounds.get(9) + "'");
+
+ assertUpdate("DROP TABLE orders");
+ }
+
+ @Test
+ public void testWithNulls()
+ {
+ assertUpdate("CREATE TABLE test_with_nulls (_integer INTEGER, _real REAL, _string VARCHAR) WITH (format = 'ORC')");
+ assertUpdate("INSERT INTO test_with_nulls VALUES (7, 3.4, 'aaa'), (3, 4.5, 'bbb'), (4, null, 'ccc'), (null, null, 'ddd')", 4);
+ MaterializedResult materializedResult = computeActual("SELECT * FROM \"test_with_nulls$files\"");
+ assertEquals(materializedResult.getRowCount(), 1);
+ DataFileRecord datafile = toDataFileRecord(materializedResult.getMaterializedRows().get(0));
+
+ // Check per-column value count
+ datafile.getValueCounts().values().forEach(valueCount -> assertEquals(valueCount, (Long) 4L));
+
+ // Check per-column null value count
+ assertEquals(datafile.getNullValueCounts().get(1), (Long) 1L);
+ assertEquals(datafile.getNullValueCounts().get(2), (Long) 2L);
+ assertEquals(datafile.getNullValueCounts().get(3), (Long) 0L);
+
+ // Check per-column lower bound
+ assertEquals(datafile.getLowerBounds().get(1), "3");
+ assertEquals(datafile.getLowerBounds().get(2), "3.4");
+ assertEquals(datafile.getLowerBounds().get(3), "aaa");
+
+ assertUpdate("DROP TABLE test_with_nulls");
+
+ assertUpdate("CREATE TABLE test_all_nulls (_integer INTEGER) WITH (format = 'ORC')");
+ assertUpdate("INSERT INTO test_all_nulls VALUES null, null, null", 3);
+ materializedResult = computeActual("SELECT * FROM \"test_all_nulls$files\"");
+ assertEquals(materializedResult.getRowCount(), 1);
+ datafile = toDataFileRecord(materializedResult.getMaterializedRows().get(0));
+
+ // Check per-column value count
+ assertEquals(datafile.getValueCounts().get(1), (Long) 3L);
+
+ // Check per-column null value count
+ assertEquals(datafile.getNullValueCounts().get(1), (Long) 3L);
+
+ // Check that lower bounds and upper bounds are nulls. (There's no non-null record)
+ assertNull(datafile.getLowerBounds());
+ assertNull(datafile.getUpperBounds());
+
+ assertUpdate("DROP TABLE test_all_nulls");
+ }
+
+ @Test
+ public void testNestedTypes()
+ {
+ assertUpdate("CREATE TABLE test_nested_types (col1 INTEGER, col2 ROW (f1 INTEGER, f2 ARRAY(INTEGER), f3 DOUBLE)) WITH (format = 'ORC')");
+ assertUpdate("INSERT INTO test_nested_types VALUES " +
+ "(7, ROW(3, ARRAY[10, 11, 19], 1.9)), " +
+ "(-9, ROW(4, ARRAY[13, 16, 20], -2.9)), " +
+ "(8, ROW(0, ARRAY[14, 17, 21], 3.9)), " +
+ "(3, ROW(10, ARRAY[15, 18, 22], 4.9))", 4);
+ MaterializedResult materializedResult = computeActual("SELECT * FROM \"test_nested_types$files\"");
+ assertEquals(materializedResult.getRowCount(), 1);
+ DataFileRecord datafile = toDataFileRecord(materializedResult.getMaterializedRows().get(0));
+
+ Map lowerBounds = datafile.getLowerBounds();
+ Map upperBounds = datafile.getUpperBounds();
+
+ // Only
+ // 1. top-level primitive columns
+ // 2. and nested primitive fields that are not descendants of LISTs or MAPs
+ // should appear in lowerBounds or UpperBounds
+ assertEquals(lowerBounds.size(), 3);
+ assertEquals(upperBounds.size(), 3);
+
+ // col1
+ assertEquals(lowerBounds.get(1), "-9");
+ assertEquals(upperBounds.get(1), "8");
+
+ // col2.f1 (key in lowerBounds/upperBounds is Iceberg ID)
+ assertEquals(lowerBounds.get(3), "0");
+ assertEquals(upperBounds.get(3), "10");
+
+ // col2.f3 (key in lowerBounds/upperBounds is Iceberg ID)
+ assertEquals(lowerBounds.get(5), "-2.9");
+ assertEquals(upperBounds.get(5), "4.9");
+
+ assertUpdate("DROP TABLE test_nested_types");
+ }
+
+ public static class DataFileRecord
+ {
+ private final String filePath;
+ private final String fileFormat;
+ private final long recordCount;
+ private final long fileSizeInBytes;
+ private final Map columnSizes;
+ private final Map valueCounts;
+ private final Map nullValueCounts;
+ private final Map lowerBounds;
+ private final Map upperBounds;
+
+ public static DataFileRecord toDataFileRecord(MaterializedRow row)
+ {
+ assertEquals(row.getFieldCount(), 11);
+ return new DataFileRecord(
+ (String) row.getField(0),
+ (String) row.getField(1),
+ (long) row.getField(2),
+ (long) row.getField(3),
+ row.getField(4) != null ? ImmutableMap.copyOf((Map) row.getField(4)) : null,
+ row.getField(5) != null ? ImmutableMap.copyOf((Map) row.getField(5)) : null,
+ row.getField(6) != null ? ImmutableMap.copyOf((Map) row.getField(6)) : null,
+ row.getField(7) != null ? ImmutableMap.copyOf((Map) row.getField(7)) : null,
+ row.getField(8) != null ? ImmutableMap.copyOf((Map) row.getField(8)) : null);
+ }
+
+ private DataFileRecord(
+ String filePath,
+ String fileFormat,
+ long recordCount,
+ long fileSizeInBytes,
+ Map columnSizes,
+ Map valueCounts,
+ Map nullValueCounts,
+ Map lowerBounds,
+ Map upperBounds)
+ {
+ this.filePath = filePath;
+ this.fileFormat = fileFormat;
+ this.recordCount = recordCount;
+ this.fileSizeInBytes = fileSizeInBytes;
+ this.columnSizes = columnSizes;
+ this.valueCounts = valueCounts;
+ this.nullValueCounts = nullValueCounts;
+ this.lowerBounds = lowerBounds;
+ this.upperBounds = upperBounds;
+ }
+
+ public String getFilePath()
+ {
+ return filePath;
+ }
+
+ public String getFileFormat()
+ {
+ return fileFormat;
+ }
+
+ public long getRecordCount()
+ {
+ return recordCount;
+ }
+
+ public long getFileSizeInBytes()
+ {
+ return fileSizeInBytes;
+ }
+
+ public Map getColumnSizes()
+ {
+ return columnSizes;
+ }
+
+ public Map getValueCounts()
+ {
+ return valueCounts;
+ }
+
+ public Map getNullValueCounts()
+ {
+ return nullValueCounts;
+ }
+
+ public Map getLowerBounds()
+ {
+ return lowerBounds;
+ }
+
+ public Map getUpperBounds()
+ {
+ return upperBounds;
+ }
+ }
+}
diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergSmoke.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergSmoke.java
index 6fbbcfc150d77..4efaba360c57e 100644
--- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergSmoke.java
+++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergSmoke.java
@@ -13,16 +13,35 @@
*/
package com.facebook.presto.iceberg;
+import com.facebook.presto.Session;
+import com.facebook.presto.testing.MaterializedResult;
import com.facebook.presto.testing.QueryRunner;
+import com.facebook.presto.testing.assertions.Assert;
import com.facebook.presto.tests.AbstractTestIntegrationSmokeTest;
import com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.FileFormat;
+import org.intellij.lang.annotations.Language;
import org.testng.annotations.Test;
+import java.util.function.BiConsumer;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static com.facebook.presto.common.type.VarcharType.VARCHAR;
import static com.facebook.presto.iceberg.IcebergQueryRunner.createIcebergQueryRunner;
+import static com.facebook.presto.testing.MaterializedResult.resultBuilder;
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.collect.Iterables.getOnlyElement;
+import static java.lang.String.format;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
public class TestIcebergSmoke
extends AbstractTestIntegrationSmokeTest
{
+ private static final Pattern WITH_CLAUSE_EXTRACTER = Pattern.compile(".*(WITH\\s*\\([^)]*\\))\\s*$", Pattern.DOTALL);
+
@Override
protected QueryRunner createQueryRunner()
throws Exception
@@ -30,33 +49,633 @@ protected QueryRunner createQueryRunner()
return createIcebergQueryRunner(ImmutableMap.of());
}
+ @Test
+ public void testTimestamp()
+ {
+ // TODO
+ }
+
+ @Test
@Override
- protected boolean isParameterizedVarcharSupported()
+ public void testDescribeTable()
+ {
+ MaterializedResult expectedColumns = resultBuilder(getQueryRunner().getDefaultSession(), VARCHAR, VARCHAR, VARCHAR, VARCHAR)
+ .row("orderkey", "bigint", "", "")
+ .row("custkey", "bigint", "", "")
+ .row("orderstatus", "varchar", "", "")
+ .row("totalprice", "double", "", "")
+ .row("orderdate", "date", "", "")
+ .row("orderpriority", "varchar", "", "")
+ .row("clerk", "varchar", "", "")
+ .row("shippriority", "integer", "", "")
+ .row("comment", "varchar", "", "")
+ .build();
+ MaterializedResult actualColumns = computeActual("DESCRIBE orders");
+ Assert.assertEquals(actualColumns, expectedColumns);
+ }
+
+ public void testShowCreateTable()
{
- return false;
+ assertThat(computeActual("SHOW CREATE TABLE orders").getOnlyValue())
+ .isEqualTo("CREATE TABLE iceberg.tpch.orders (\n" +
+ " orderkey bigint,\n" +
+ " custkey bigint,\n" +
+ " orderstatus varchar,\n" +
+ " totalprice double,\n" +
+ " orderdate date,\n" +
+ " orderpriority varchar,\n" +
+ " clerk varchar,\n" +
+ " shippriority integer,\n" +
+ " comment varchar\n" +
+ ")\n" +
+ "WITH (\n" +
+ " format = 'ORC'\n" +
+ ")");
}
@Test
public void testDecimal()
+ {
+ testWithAllFileFormats((session, format) -> testDecimalForFormat(session, format));
+ }
+
+ private void testDecimalForFormat(Session session, FileFormat format)
+ {
+ testDecimalWithPrecisionAndScale(session, format, 1, 0);
+ testDecimalWithPrecisionAndScale(session, format, 8, 6);
+ testDecimalWithPrecisionAndScale(session, format, 9, 8);
+ testDecimalWithPrecisionAndScale(session, format, 10, 8);
+
+ testDecimalWithPrecisionAndScale(session, format, 18, 1);
+ testDecimalWithPrecisionAndScale(session, format, 18, 8);
+ testDecimalWithPrecisionAndScale(session, format, 18, 17);
+
+ testDecimalWithPrecisionAndScale(session, format, 17, 16);
+ testDecimalWithPrecisionAndScale(session, format, 18, 17);
+ testDecimalWithPrecisionAndScale(session, format, 24, 10);
+ testDecimalWithPrecisionAndScale(session, format, 30, 10);
+ testDecimalWithPrecisionAndScale(session, format, 37, 26);
+ testDecimalWithPrecisionAndScale(session, format, 38, 37);
+
+ testDecimalWithPrecisionAndScale(session, format, 38, 17);
+ testDecimalWithPrecisionAndScale(session, format, 38, 37);
+ }
+
+ private void testDecimalWithPrecisionAndScale(Session session, FileFormat format, int precision, int scale)
+ {
+ checkArgument(precision >= 1 && precision <= 38, "Decimal precision (%s) must be between 1 and 38 inclusive", precision);
+ checkArgument(scale < precision && scale >= 0, "Decimal scale (%s) must be less than the precision (%s) and non-negative", scale, precision);
+
+ String tableName = format("test_decimal_p%d_s%d", precision, scale);
+ String decimalType = format("DECIMAL(%d,%d)", precision, scale);
+ String beforeTheDecimalPoint = "12345678901234567890123456789012345678".substring(0, precision - scale);
+ String afterTheDecimalPoint = "09876543210987654321098765432109876543".substring(0, scale);
+ String decimalValue = format("%s.%s", beforeTheDecimalPoint, afterTheDecimalPoint);
+
+ assertUpdate(session, format("CREATE TABLE %s (x %s) WITH (format = '%s')", tableName, decimalType, format.name()));
+ assertUpdate(session, format("INSERT INTO %s (x) VALUES (CAST('%s' AS %s))", tableName, decimalValue, decimalType), 1);
+ assertQuery(session, format("SELECT * FROM %s", tableName), format("SELECT CAST('%s' AS %s)", decimalValue, decimalType));
+ dropTable(session, tableName);
+ }
+
+ @Test
+ public void testParquetPartitionByTimestamp()
{
// TODO
}
@Test
- public void testTimestamp()
+ public void testParquetSelectByTimestamp()
{
// TODO
}
@Test
- public void testCreatePartitionedTable()
+ public void testOrcPartitionByTimestamp()
{
// TODO
}
+ @Test
+ public void testOrcSelectByTimestamp()
+ {
+ // TODO
+ }
+
+ @Test
+ public void testCreatePartitionedTable()
+ {
+ testWithAllFileFormats(this::testCreatePartitionedTable);
+ }
+
+ private void testCreatePartitionedTable(Session session, FileFormat fileFormat)
+ {
+ @Language("SQL") String createTable = "" +
+ "CREATE TABLE test_partitioned_table (" +
+ " _string VARCHAR" +
+ ", _bigint BIGINT" +
+ ", _integer INTEGER" +
+ ", _real REAL" +
+ ", _double DOUBLE" +
+ ", _boolean BOOLEAN" +
+ ", _decimal_short DECIMAL(3,2)" +
+ ", _decimal_long DECIMAL(30,10)" +
+ ", _date DATE" +
+ ") " +
+ "WITH (" +
+ "format = '" + fileFormat + "', " +
+ "partitioning = ARRAY[" +
+ " '_string'," +
+ " '_integer'," +
+ " '_bigint'," +
+ " '_boolean'," +
+ " '_real'," +
+ " '_double'," +
+ " '_decimal_short', " +
+ " '_decimal_long'," +
+ " '_date']" +
+ ")";
+
+ assertUpdate(session, createTable);
+
+ MaterializedResult result = computeActual("SELECT * from test_partitioned_table");
+ assertEquals(result.getRowCount(), 0);
+
+ @Language("SQL") String select = "" +
+ "SELECT" +
+ " 'foo' _string" +
+ ", CAST(123 AS BIGINT) _bigint" +
+ ", 456 _integer" +
+ ", CAST('123.45' AS REAL) _real" +
+ ", CAST('3.14' AS DOUBLE) _double" +
+ ", true _boolean" +
+ ", CAST('3.14' AS DECIMAL(3,2)) _decimal_short" +
+ ", CAST('12345678901234567890.0123456789' AS DECIMAL(30,10)) _decimal_long" +
+ ", CAST('2017-05-01' AS DATE) _date";
+
+ assertUpdate(session, "INSERT INTO test_partitioned_table " + select, 1);
+ assertQuery(session, "SELECT * from test_partitioned_table", select);
+ assertQuery(session, "" +
+ "SELECT * FROM test_partitioned_table WHERE" +
+ " 'foo' = _string" +
+ " AND 456 = _integer" +
+ " AND CAST(123 AS BIGINT) = _bigint" +
+ " AND true = _boolean" +
+ " AND CAST('3.14' AS DECIMAL(3,2)) = _decimal_short" +
+ " AND CAST('12345678901234567890.0123456789' AS DECIMAL(30,10)) = _decimal_long" +
+ " AND CAST('2017-05-01' AS DATE) = _date",
+ select);
+
+ dropTable(session, "test_partitioned_table");
+ }
+
+ @Test
+ public void testCreatePartitionedTableWithNestedTypes()
+ {
+ testWithAllFileFormats(this::testCreatePartitionedTableWithNestedTypes);
+ }
+
+ private void testCreatePartitionedTableWithNestedTypes(Session session, FileFormat fileFormat)
+ {
+ @Language("SQL") String createTable = "" +
+ "CREATE TABLE test_partitioned_table_nested_type (" +
+ " _string VARCHAR" +
+ ", _struct ROW(_field1 INT, _field2 VARCHAR)" +
+ ", _date DATE" +
+ ") " +
+ "WITH (" +
+ "format = '" + fileFormat + "', " +
+ "partitioning = ARRAY['_date']" +
+ ")";
+
+ assertUpdate(session, createTable);
+
+ dropTable(session, "test_partitioned_table_nested_type");
+ }
+
+ @Test
+ public void testPartitionedTableWithNullValues()
+ {
+ testWithAllFileFormats(this::testPartitionedTableWithNullValues);
+ }
+
+ private void testPartitionedTableWithNullValues(Session session, FileFormat fileFormat)
+ {
+ @Language("SQL") String createTable = "" +
+ "CREATE TABLE test_partitioned_table_with_null_values (" +
+ " _string VARCHAR" +
+ ", _bigint BIGINT" +
+ ", _integer INTEGER" +
+ ", _real REAL" +
+ ", _double DOUBLE" +
+ ", _boolean BOOLEAN" +
+ ", _decimal_short DECIMAL(3,2)" +
+ ", _decimal_long DECIMAL(30,10)" +
+ ", _date DATE" +
+ ") " +
+ "WITH (" +
+ "format = '" + fileFormat + "', " +
+ "partitioning = ARRAY[" +
+ " '_string'," +
+ " '_integer'," +
+ " '_bigint'," +
+ " '_boolean'," +
+ " '_real'," +
+ " '_double'," +
+ " '_decimal_short', " +
+ " '_decimal_long'," +
+ " '_date']" +
+ ")";
+
+ assertUpdate(session, createTable);
+
+ MaterializedResult result = computeActual("SELECT * from test_partitioned_table_with_null_values");
+ assertEquals(result.getRowCount(), 0);
+
+ @Language("SQL") String select = "" +
+ "SELECT" +
+ " null _string" +
+ ", null _bigint" +
+ ", null _integer" +
+ ", null _real" +
+ ", null _double" +
+ ", null _boolean" +
+ ", null _decimal_short" +
+ ", null _decimal_long" +
+ ", null _date";
+
+ assertUpdate(session, "INSERT INTO test_partitioned_table_with_null_values " + select, 1);
+ assertQuery(session, "SELECT * from test_partitioned_table_with_null_values", select);
+ dropTable(session, "test_partitioned_table_with_null_values");
+ }
+
@Test
public void testCreatePartitionedTableAs()
+ {
+ testWithAllFileFormats(this::testCreatePartitionedTableAs);
+ }
+
+ private void testCreatePartitionedTableAs(Session session, FileFormat fileFormat)
+ {
+ @Language("SQL") String createTable = "" +
+ "CREATE TABLE test_create_partitioned_table_as " +
+ "WITH (" +
+ "format = '" + fileFormat + "', " +
+ "partitioning = ARRAY['ORDER_STATUS', 'Ship_Priority', 'Bucket(order_key,9)']" +
+ ") " +
+ "AS " +
+ "SELECT orderkey AS order_key, shippriority AS ship_priority, orderstatus AS order_status " +
+ "FROM tpch.tiny.orders";
+
+ assertUpdate(session, createTable, "SELECT count(*) from orders");
+
+ String createTableSql = format("" +
+ "CREATE TABLE %s.%s.%s (\n" +
+ " \"order_key\" bigint,\n" +
+ " \"ship_priority\" integer,\n" +
+ " \"order_status\" varchar\n" +
+ ")\n" +
+ "WITH (\n" +
+ " format = '" + fileFormat + "',\n" +
+ " partitioning = ARRAY['order_status','ship_priority','bucket(order_key, 9)']\n" +
+ ")",
+ getSession().getCatalog().get(),
+ getSession().getSchema().get(),
+ "test_create_partitioned_table_as");
+
+ MaterializedResult actualResult = computeActual("SHOW CREATE TABLE test_create_partitioned_table_as");
+ assertEquals(getOnlyElement(actualResult.getOnlyColumnAsSet()), createTableSql);
+
+ assertQuery(session, "SELECT * from test_create_partitioned_table_as", "SELECT orderkey, shippriority, orderstatus FROM orders");
+
+ dropTable(session, "test_create_partitioned_table_as");
+ }
+
+ @Test
+ public void testColumnComments()
+ {
+ Session session = getSession();
+ assertUpdate(session, "CREATE TABLE test_column_comments (_bigint BIGINT COMMENT 'test column comment')");
+
+ assertQuery(session, "SHOW COLUMNS FROM test_column_comments",
+ "VALUES ('_bigint', 'bigint', '', 'test column comment')");
+
+ dropTable(session, "test_column_comments");
+ }
+
+ @Test
+ public void testTableComments()
+ {
+ Session session = getSession();
+ String createTableTemplate = "" +
+ "CREATE TABLE iceberg.tpch.test_table_comments (\n" +
+ " \"_x\" bigint\n" +
+ ")\n" +
+ "COMMENT '%s'\n" +
+ "WITH (\n" +
+ " format = 'ORC'\n" +
+ ")";
+ String createTableSql = format(createTableTemplate, "test table comment");
+ assertUpdate(createTableSql);
+ MaterializedResult resultOfCreate = computeActual("SHOW CREATE TABLE test_table_comments");
+ assertEquals(getOnlyElement(resultOfCreate.getOnlyColumnAsSet()), createTableSql);
+
+ dropTable(session, "test_table_comments");
+ }
+
+ @Test
+ public void testRollbackSnapshot()
+ {
+ Session session = getSession();
+ MaterializedResult result = computeActual("SHOW SCHEMAS FROM system");
+ assertUpdate(session, "CREATE TABLE test_rollback (col0 INTEGER, col1 BIGINT)");
+ long afterCreateTableId = getLatestSnapshotId();
+
+ assertUpdate(session, "INSERT INTO test_rollback (col0, col1) VALUES (123, CAST(987 AS BIGINT))", 1);
+ long afterFirstInsertId = getLatestSnapshotId();
+
+ assertUpdate(session, "INSERT INTO test_rollback (col0, col1) VALUES (456, CAST(654 AS BIGINT))", 1);
+ assertQuery(session, "SELECT * FROM test_rollback ORDER BY col0", "VALUES (123, CAST(987 AS BIGINT)), (456, CAST(654 AS BIGINT))");
+
+ assertUpdate(format("CALL system.rollback_to_snapshot('tpch', 'test_rollback', %s)", afterFirstInsertId));
+ assertQuery(session, "SELECT * FROM test_rollback ORDER BY col0", "VALUES (123, CAST(987 AS BIGINT))");
+
+ assertUpdate(format("CALL system.rollback_to_snapshot('tpch', 'test_rollback', %s)", afterCreateTableId));
+ assertEquals((long) computeActual(session, "SELECT COUNT(*) FROM test_rollback").getOnlyValue(), 0);
+
+ dropTable(session, "test_rollback");
+ }
+
+ private long getLatestSnapshotId()
+ {
+ return (long) computeActual("SELECT snapshot_id FROM \"test_rollback$snapshots\" ORDER BY committed_at DESC LIMIT 1")
+ .getOnlyValue();
+ }
+
+ @Test
+ public void testInsertIntoNotNullColumn()
+ {
+ // TODO: To support non-null column. (NOT_NULL_COLUMN_CONSTRAINT)
+ }
+
+ @Test
+ public void testSchemaEvolution()
+ {
+ // TODO: Support schema evolution for PARQUET. Schema evolution should be id based.
+ testSchemaEvolution(getSession(), FileFormat.ORC);
+ }
+
+ private void testSchemaEvolution(Session session, FileFormat fileFormat)
+ {
+ assertUpdate(session, "CREATE TABLE test_schema_evolution_drop_end (col0 INTEGER, col1 INTEGER, col2 INTEGER) WITH (format = '" + fileFormat + "')");
+ assertUpdate(session, "INSERT INTO test_schema_evolution_drop_end VALUES (0, 1, 2)", 1);
+ assertQuery(session, "SELECT * FROM test_schema_evolution_drop_end", "VALUES(0, 1, 2)");
+ assertUpdate(session, "ALTER TABLE test_schema_evolution_drop_end DROP COLUMN col2");
+ assertQuery(session, "SELECT * FROM test_schema_evolution_drop_end", "VALUES(0, 1)");
+ assertUpdate(session, "ALTER TABLE test_schema_evolution_drop_end ADD COLUMN col2 INTEGER");
+ assertQuery(session, "SELECT * FROM test_schema_evolution_drop_end", "VALUES(0, 1, NULL)");
+ assertUpdate(session, "INSERT INTO test_schema_evolution_drop_end VALUES (3, 4, 5)", 1);
+ assertQuery(session, "SELECT * FROM test_schema_evolution_drop_end", "VALUES(0, 1, NULL), (3, 4, 5)");
+ dropTable(session, "test_schema_evolution_drop_end");
+
+ assertUpdate(session, "CREATE TABLE test_schema_evolution_drop_middle (col0 INTEGER, col1 INTEGER, col2 INTEGER) WITH (format = '" + fileFormat + "')");
+ assertUpdate(session, "INSERT INTO test_schema_evolution_drop_middle VALUES (0, 1, 2)", 1);
+ assertQuery(session, "SELECT * FROM test_schema_evolution_drop_middle", "VALUES(0, 1, 2)");
+ assertUpdate(session, "ALTER TABLE test_schema_evolution_drop_middle DROP COLUMN col1");
+ assertQuery(session, "SELECT * FROM test_schema_evolution_drop_middle", "VALUES(0, 2)");
+ assertUpdate(session, "ALTER TABLE test_schema_evolution_drop_middle ADD COLUMN col1 INTEGER");
+ assertUpdate(session, "INSERT INTO test_schema_evolution_drop_middle VALUES (3, 4, 5)", 1);
+ assertQuery(session, "SELECT * FROM test_schema_evolution_drop_middle", "VALUES(0, 2, NULL), (3, 4, 5)");
+ dropTable(session, "test_schema_evolution_drop_middle");
+ }
+
+ @Test
+ private void testCreateTableLike()
+ {
+ Session session = getSession();
+ assertUpdate(session, "CREATE TABLE test_create_table_like_original (col1 INTEGER, aDate DATE) WITH(format = 'PARQUET', partitioning = ARRAY['aDate'])");
+ assertEquals(getTablePropertiesString("test_create_table_like_original"), "WITH (\n" +
+ " format = 'PARQUET',\n" +
+ " partitioning = ARRAY['adate']\n" +
+ ")");
+
+ assertUpdate(session, "CREATE TABLE test_create_table_like_copy0 (LIKE test_create_table_like_original, col2 INTEGER)");
+ assertUpdate(session, "INSERT INTO test_create_table_like_copy0 (col1, aDate, col2) VALUES (1, CAST('1950-06-28' AS DATE), 3)", 1);
+ assertQuery(session, "SELECT * from test_create_table_like_copy0", "VALUES(1, CAST('1950-06-28' AS DATE), 3)");
+ dropTable(session, "test_create_table_like_copy0");
+
+ assertUpdate(session, "CREATE TABLE test_create_table_like_copy1 (LIKE test_create_table_like_original)");
+ assertEquals(getTablePropertiesString("test_create_table_like_copy1"), "WITH (\n" +
+ " format = 'PARQUET'\n" +
+ ")");
+ dropTable(session, "test_create_table_like_copy1");
+
+ assertUpdate(session, "CREATE TABLE test_create_table_like_copy2 (LIKE test_create_table_like_original EXCLUDING PROPERTIES)");
+ assertEquals(getTablePropertiesString("test_create_table_like_copy2"), "WITH (\n" +
+ " format = 'PARQUET'\n" +
+ ")");
+ dropTable(session, "test_create_table_like_copy2");
+
+ assertUpdate(session, "CREATE TABLE test_create_table_like_copy3 (LIKE test_create_table_like_original INCLUDING PROPERTIES)");
+ assertEquals(getTablePropertiesString("test_create_table_like_copy3"), "WITH (\n" +
+ " format = 'PARQUET',\n" +
+ " partitioning = ARRAY['adate']\n" +
+ ")");
+ dropTable(session, "test_create_table_like_copy3");
+
+ assertUpdate(session, "CREATE TABLE test_create_table_like_copy4 (LIKE test_create_table_like_original INCLUDING PROPERTIES) WITH (format = 'ORC')");
+ assertEquals(getTablePropertiesString("test_create_table_like_copy4"), "WITH (\n" +
+ " format = 'ORC',\n" +
+ " partitioning = ARRAY['adate']\n" +
+ ")");
+ dropTable(session, "test_create_table_like_copy4");
+
+ dropTable(session, "test_create_table_like_original");
+ }
+
+ private String getTablePropertiesString(String tableName)
+ {
+ MaterializedResult showCreateTable = computeActual("SHOW CREATE TABLE " + tableName);
+ String createTable = (String) getOnlyElement(showCreateTable.getOnlyColumnAsSet());
+ Matcher matcher = WITH_CLAUSE_EXTRACTER.matcher(createTable);
+ if (matcher.matches()) {
+ return matcher.group(1);
+ }
+ else {
+ return null;
+ }
+ }
+
+ @Test
+ public void testPredicating()
+ {
+ testWithAllFileFormats(this::testPredicating);
+ }
+
+ private void testPredicating(Session session, FileFormat fileFormat)
+ {
+ assertUpdate(session, "CREATE TABLE test_predicating_on_real (col REAL) WITH (format = '" + fileFormat + "')");
+ assertUpdate(session, "INSERT INTO test_predicating_on_real VALUES 1.2", 1);
+ assertQuery(session, "SELECT * FROM test_predicating_on_real WHERE col = 1.2", "VALUES 1.2");
+ dropTable(session, "test_predicating_on_real");
+ }
+
+ @Test
+ public void testDateTransforms()
{
// TODO
}
+
+ @Test
+ public void testTruncateTransform()
+ {
+ testWithAllFileFormats(this::testTruncateTransformsForFormat);
+ }
+
+ private void testTruncateTransformsForFormat(Session session, FileFormat format)
+ {
+ String select = "SELECT d_trunc, row_count, d.min AS d_min, d.max AS d_max, b.min AS b_min, b.max AS b_max FROM \"test_truncate_transform$partitions\"";
+
+ assertUpdate(session, format("CREATE TABLE test_truncate_transform (d VARCHAR, b BIGINT)" +
+ " WITH (format = '%s', partitioning = ARRAY['truncate(d, 2)'])", format.name()));
+
+ String insertSql = "INSERT INTO test_truncate_transform VALUES" +
+ "('abcd', 1)," +
+ "('abxy', 2)," +
+ "('ab598', 3)," +
+ "('mommy', 4)," +
+ "('moscow', 5)," +
+ "('Greece', 6)," +
+ "('Grozny', 7)";
+ assertUpdate(session, insertSql, 7);
+
+ assertQuery(session, "SELECT COUNT(*) FROM \"test_truncate_transform$partitions\"", "SELECT 3");
+
+ assertQuery(session, "SELECT b FROM test_truncate_transform WHERE substr(d, 1, 2) = 'ab'", "SELECT b FROM (VALUES (1), (2), (3)) AS t(b)");
+ assertQuery(session, select + " WHERE d_trunc = 'ab'", "VALUES('ab', 3, 'ab598', 'abxy', 1, 3)");
+
+ assertQuery(session, "SELECT b FROM test_truncate_transform WHERE substr(d, 1, 2) = 'mo'", "SELECT b FROM (VALUES (4), (5)) AS t(b)");
+ assertQuery(session, select + " WHERE d_trunc = 'mo'", "VALUES('mo', 2, 'mommy', 'moscow', 4, 5)");
+
+ assertQuery(session, "SELECT b FROM test_truncate_transform WHERE substr(d, 1, 2) = 'Gr'", "SELECT b FROM (VALUES (6), (7)) AS t(b)");
+ assertQuery(session, select + " WHERE d_trunc = 'Gr'", "VALUES('Gr', 2, 'Greece', 'Grozny', 6, 7)");
+
+ dropTable(session, "test_truncate_transform");
+ }
+
+ @Test
+ public void testBucketTransform()
+ {
+ testWithAllFileFormats(this::testBucketTransformsForFormat);
+ }
+
+ private void testBucketTransformsForFormat(Session session, FileFormat format)
+ {
+ String select = "SELECT d_bucket, row_count, d.min AS d_min, d.max AS d_max, b.min AS b_min, b.max AS b_max FROM \"test_bucket_transform$partitions\"";
+
+ assertUpdate(session, format("CREATE TABLE test_bucket_transform (d VARCHAR, b BIGINT)" +
+ " WITH (format = '%s', partitioning = ARRAY['bucket(d, 2)'])", format.name()));
+ String insertSql = "INSERT INTO test_bucket_transform VALUES" +
+ "('abcd', 1)," +
+ "('abxy', 2)," +
+ "('ab598', 3)," +
+ "('mommy', 4)," +
+ "('moscow', 5)," +
+ "('Greece', 6)," +
+ "('Grozny', 7)";
+ assertUpdate(session, insertSql, 7);
+
+ assertQuery(session, "SELECT COUNT(*) FROM \"test_bucket_transform$partitions\"", "SELECT 2");
+
+ assertQuery(session, select + " WHERE d_bucket = 0", "VALUES(0, 3, 'Grozny', 'mommy', 1, 7)");
+
+ assertQuery(session, select + " WHERE d_bucket = 1", "VALUES(1, 4, 'Greece', 'moscow', 2, 6)");
+
+ dropTable(session, "test_bucket_transform");
+ }
+
+ private void testWithAllFileFormats(BiConsumer test)
+ {
+ test.accept(getSession(), FileFormat.PARQUET);
+ test.accept(getSession(), FileFormat.ORC);
+ }
+
+ private void dropTable(Session session, String table)
+ {
+ assertUpdate(session, "DROP TABLE " + table);
+ assertFalse(getQueryRunner().tableExists(session, table));
+ }
+ @Test
+ public void testCreateNestedPartitionedTable()
+ {
+ testWithAllFileFormats(this::testCreateNestedPartitionedTable);
+ }
+
+ public void testCreateNestedPartitionedTable(Session session, FileFormat fileFormat)
+ {
+ @Language("SQL") String createTable = "" +
+ "CREATE TABLE test_nested_table (" +
+ " bool BOOLEAN" +
+ ", int INTEGER" +
+ ", arr ARRAY(VARCHAR)" +
+ ", big BIGINT" +
+ ", rl REAL" +
+ ", dbl DOUBLE" +
+ ", mp MAP(INTEGER, VARCHAR)" +
+ ", dec DECIMAL(5,2)" +
+ ", vc VARCHAR" +
+ ", vb VARBINARY" +
+ ", str ROW(id INTEGER , vc VARCHAR)" +
+ ", dt DATE)" +
+ " WITH (partitioning = ARRAY['int']," +
+ " format = '" + fileFormat + "'" +
+ ")";
+
+ assertUpdate(session, createTable);
+
+ assertUpdate(session, "INSERT INTO test_nested_table " +
+ " select true, 1, array['uno', 'dos', 'tres'], BIGINT '1', REAL '1.0', DOUBLE '1.0', map(array[1,2,3,4], array['ek','don','teen','char'])," +
+ " CAST(1.0 as DECIMAL(5,2))," +
+ " 'one', VARBINARY 'binary0/1values',\n" +
+ " (CAST(ROW(null, 'this is a random value') AS ROW(int, varchar))), current_date", 1);
+ MaterializedResult result = computeActual("SELECT * from test_nested_table");
+ assertEquals(result.getRowCount(), 1);
+
+ dropTable(session, "test_nested_table");
+
+ @Language("SQL") String createTable2 = "" +
+ "CREATE TABLE test_nested_table (" +
+ " int INTEGER" +
+ ", arr ARRAY(ROW(id INTEGER, vc VARCHAR))" +
+ ", big BIGINT" +
+ ", rl REAL" +
+ ", dbl DOUBLE" +
+ ", mp MAP(INTEGER, ARRAY(VARCHAR))" +
+ ", dec DECIMAL(5,2)" +
+ ", str ROW(id INTEGER, vc VARCHAR, arr ARRAY(INTEGER))" +
+ ", vc VARCHAR)" +
+ " WITH (partitioning = ARRAY['int']," +
+ " format = '" + fileFormat + "'" +
+ ")";
+
+ assertUpdate(session, createTable2);
+
+ assertUpdate(session, "INSERT INTO test_nested_table " +
+ " select 1, array[cast(row(1, null) as row(int, varchar)), cast(row(2, 'dos') as row(int, varchar))], BIGINT '1', REAL '1.0', DOUBLE '1.0', " +
+ "map(array[1,2], array[array['ek', 'one'], array['don', 'do', 'two']]), CAST(1.0 as DECIMAL(5,2)), " +
+ "CAST(ROW(1, 'this is a random value', null) AS ROW(int, varchar, array(int))), 'one'", 1);
+ result = computeActual("SELECT * from test_nested_table");
+ assertEquals(result.getRowCount(), 1);
+
+ @Language("SQL") String createTable3 = "" +
+ "CREATE TABLE test_nested_table2 WITH (partitioning = ARRAY['int']) as select * from test_nested_table";
+
+ assertUpdate(session, createTable3, 1);
+
+ result = computeActual("SELECT * from test_nested_table2");
+ assertEquals(result.getRowCount(), 1);
+
+ dropTable(session, "test_nested_table");
+ dropTable(session, "test_nested_table2");
+ }
}
diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergSystemTables.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergSystemTables.java
new file mode 100644
index 0000000000000..9627435fb5645
--- /dev/null
+++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergSystemTables.java
@@ -0,0 +1,182 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.iceberg;
+
+import com.facebook.presto.Session;
+import com.facebook.presto.testing.MaterializedResult;
+import com.facebook.presto.testing.MaterializedRow;
+import com.facebook.presto.testing.QueryRunner;
+import com.facebook.presto.tests.AbstractTestQueryFramework;
+import com.facebook.presto.tests.DistributedQueryRunner;
+import com.google.common.collect.ImmutableMap;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import java.nio.file.Path;
+import java.time.LocalDate;
+import java.util.Map;
+import java.util.function.Function;
+
+import static com.facebook.presto.iceberg.IcebergQueryRunner.ICEBERG_CATALOG;
+import static com.facebook.presto.testing.TestingSession.testSessionBuilder;
+import static com.google.common.collect.ImmutableMap.toImmutableMap;
+import static org.testng.Assert.assertEquals;
+
+public class TestIcebergSystemTables
+ extends AbstractTestQueryFramework
+{
+ private static final int DEFAULT_PRECISION = 5;
+
+ @Override
+ protected QueryRunner createQueryRunner()
+ throws Exception
+ {
+ Session session = testSessionBuilder()
+ .setCatalog(ICEBERG_CATALOG)
+ .build();
+ DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(session).build();
+
+ Path dataDir = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data");
+
+ queryRunner.installPlugin(new IcebergPlugin());
+ Map icebergProperties = ImmutableMap.builder()
+ .put("hive.metastore", "file")
+ .put("hive.metastore.catalog.dir", dataDir.toString() + "/catalog")
+ .build();
+
+ queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", icebergProperties);
+
+ return queryRunner;
+ }
+
+ @BeforeClass
+ public void setUp()
+ {
+ assertUpdate("CREATE SCHEMA test_schema");
+ assertUpdate("CREATE TABLE test_schema.test_table (_bigint BIGINT, _date DATE) WITH (partitioning = ARRAY['_date'])");
+ assertUpdate("INSERT INTO test_schema.test_table VALUES (0, CAST('2019-09-08' AS DATE)), (1, CAST('2019-09-09' AS DATE)), (2, CAST('2019-09-09' AS DATE))", 3);
+ assertUpdate("INSERT INTO test_schema.test_table VALUES (3, CAST('2019-09-09' AS DATE)), (4, CAST('2019-09-10' AS DATE)), (5, CAST('2019-09-10' AS DATE))", 3);
+ assertQuery("SELECT count(*) FROM test_schema.test_table", "VALUES 6");
+
+ assertUpdate("CREATE TABLE test_schema.test_table_multilevel_partitions (_varchar VARCHAR, _bigint BIGINT, _date DATE) WITH (partitioning = ARRAY['_bigint', '_date'])");
+ assertUpdate("INSERT INTO test_schema.test_table_multilevel_partitions VALUES ('a', 0, CAST('2019-09-08' AS DATE)), ('a', 1, CAST('2019-09-08' AS DATE)), ('a', 0, CAST('2019-09-09' AS DATE))", 3);
+ assertQuery("SELECT count(*) FROM test_schema.test_table_multilevel_partitions", "VALUES 3");
+ }
+
+ @Test
+ public void testPartitionTable()
+ {
+ assertQuery("SELECT count(*) FROM test_schema.test_table", "VALUES 6");
+ assertQuery("SHOW COLUMNS FROM test_schema.\"test_table$partitions\"",
+ "VALUES ('_date', 'date', '', '')," +
+ "('row_count', 'bigint', '', '')," +
+ "('file_count', 'bigint', '', '')," +
+ "('total_size', 'bigint', '', '')," +
+ "('_bigint', 'row(\"min\" bigint, \"max\" bigint, \"null_count\" bigint)', '', '')");
+
+ MaterializedResult result = computeActual("SELECT * from test_schema.\"test_table$partitions\"");
+ assertEquals(result.getRowCount(), 3);
+
+ Map rowsByPartition = result.getMaterializedRows().stream()
+ .collect(toImmutableMap(row -> (LocalDate) row.getField(0), Function.identity()));
+
+ // Test if row counts are computed correctly
+ assertEquals(rowsByPartition.get(LocalDate.parse("2019-09-08")).getField(1), 1L);
+ assertEquals(rowsByPartition.get(LocalDate.parse("2019-09-09")).getField(1), 3L);
+ assertEquals(rowsByPartition.get(LocalDate.parse("2019-09-10")).getField(1), 2L);
+
+ // Test if min/max values and null value count are computed correctly.
+ assertEquals(
+ rowsByPartition.get(LocalDate.parse("2019-09-08")).getField(4),
+ new MaterializedRow(DEFAULT_PRECISION, 0L, 0L, 0L).getFields());
+ assertEquals(
+ rowsByPartition.get(LocalDate.parse("2019-09-09")).getField(4),
+ new MaterializedRow(DEFAULT_PRECISION, 1L, 3L, 0L).getFields());
+ assertEquals(
+ rowsByPartition.get(LocalDate.parse("2019-09-10")).getField(4),
+ new MaterializedRow(DEFAULT_PRECISION, 4L, 5L, 0L).getFields());
+ }
+
+ @Test
+ public void testHistoryTable()
+ {
+ assertQuery("SHOW COLUMNS FROM test_schema.\"test_table$history\"",
+ "VALUES ('made_current_at', 'timestamp with time zone', '', '')," +
+ "('snapshot_id', 'bigint', '', '')," +
+ "('parent_id', 'bigint', '', '')," +
+ "('is_current_ancestor', 'boolean', '', '')");
+
+ // Test the number of history entries
+ assertQuery("SELECT count(*) FROM test_schema.\"test_table$history\"", "VALUES 3");
+ }
+
+ @Test
+ public void testSnapshotsTable()
+ {
+ assertQuery("SHOW COLUMNS FROM test_schema.\"test_table$snapshots\"",
+ "VALUES ('committed_at', 'timestamp with time zone', '', '')," +
+ "('snapshot_id', 'bigint', '', '')," +
+ "('parent_id', 'bigint', '', '')," +
+ "('operation', 'varchar', '', '')," +
+ "('manifest_list', 'varchar', '', '')," +
+ "('summary', 'map(varchar, varchar)', '', '')");
+
+ assertQuery("SELECT operation FROM test_schema.\"test_table$snapshots\"", "VALUES 'append', 'append', 'append'");
+ assertQuery("SELECT summary['total-records'] FROM test_schema.\"test_table$snapshots\"", "VALUES '0', '3', '6'");
+ }
+
+ @Test
+ public void testManifestsTable()
+ {
+ assertQuery("SHOW COLUMNS FROM test_schema.\"test_table$manifests\"",
+ "VALUES ('path', 'varchar', '', '')," +
+ "('length', 'bigint', '', '')," +
+ "('partition_spec_id', 'integer', '', '')," +
+ "('added_snapshot_id', 'bigint', '', '')," +
+ "('added_data_files_count', 'integer', '', '')," +
+ "('existing_data_files_count', 'integer', '', '')," +
+ "('deleted_data_files_count', 'integer', '', '')," +
+ "('partitions', 'array(row(\"contains_null\" boolean, \"lower_bound\" varchar, \"upper_bound\" varchar))', '', '')");
+ assertQuerySucceeds("SELECT * FROM test_schema.\"test_table$manifests\"");
+
+ assertQuerySucceeds("SELECT * FROM test_schema.\"test_table_multilevel_partitions$manifests\"");
+ }
+
+ @Test
+ public void testFilesTable()
+ {
+ assertQuery("SHOW COLUMNS FROM test_schema.\"test_table$files\"",
+ "VALUES ('file_path', 'varchar', '', '')," +
+ "('file_format', 'varchar', '', '')," +
+ "('record_count', 'bigint', '', '')," +
+ "('file_size_in_bytes', 'bigint', '', '')," +
+ "('column_sizes', 'map(integer, bigint)', '', '')," +
+ "('value_counts', 'map(integer, bigint)', '', '')," +
+ "('null_value_counts', 'map(integer, bigint)', '', '')," +
+ "('lower_bounds', 'map(integer, varchar)', '', '')," +
+ "('upper_bounds', 'map(integer, varchar)', '', '')," +
+ "('key_metadata', 'varbinary', '', '')," +
+ "('split_offsets', 'array(bigint)', '', '')");
+ assertQuerySucceeds("SELECT * FROM test_schema.\"test_table$files\"");
+ }
+
+ @AfterClass(alwaysRun = true)
+ public void tearDown()
+ {
+ assertUpdate("DROP TABLE IF EXISTS test_schema.test_table");
+ assertUpdate("DROP TABLE IF EXISTS test_schema.test_table_multilevel_partitions");
+ assertUpdate("DROP SCHEMA IF EXISTS test_schema");
+ }
+}
diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/OrcWriter.java b/presto-orc/src/main/java/com/facebook/presto/orc/OrcWriter.java
index 8fa45ce800b96..b69bfb18decb4 100644
--- a/presto-orc/src/main/java/com/facebook/presto/orc/OrcWriter.java
+++ b/presto-orc/src/main/java/com/facebook/presto/orc/OrcWriter.java
@@ -144,6 +144,9 @@ public class OrcWriter
private long previouslyRecordedSizeInBytes;
private boolean closed;
+ private long numberOfRows;
+ private List unencryptedStats;
+
@Nullable
private final OrcWriteValidation.OrcWriteValidationBuilder validationBuilder;
@@ -162,6 +165,41 @@ public OrcWriter(
boolean validate,
OrcWriteValidationMode validationMode,
WriterStats stats)
+ {
+ this(
+ dataSink,
+ columnNames,
+ types,
+ Optional.empty(),
+ orcEncoding,
+ compressionKind,
+ encryption,
+ dwrfEncryptionProvider,
+ options,
+ dwrfOptions,
+ userMetadata,
+ hiveStorageTimeZone,
+ validate,
+ validationMode,
+ stats);
+ }
+
+ public OrcWriter(
+ DataSink dataSink,
+ List columnNames,
+ List types,
+ Optional> inputOrcTypes,
+ OrcEncoding orcEncoding,
+ CompressionKind compressionKind,
+ Optional encryption,
+ DwrfEncryptionProvider dwrfEncryptionProvider,
+ OrcWriterOptions options,
+ Optional dwrfOptions,
+ Map userMetadata,
+ DateTimeZone hiveStorageTimeZone,
+ boolean validate,
+ OrcWriteValidationMode validationMode,
+ WriterStats stats)
{
this.validationBuilder = validate ? new OrcWriteValidation.OrcWriteValidationBuilder(validationMode, types).setStringStatisticsLimitInBytes(toIntExact(options.getMaxStringStatisticsLimit().toBytes())) : null;
@@ -199,7 +237,8 @@ public OrcWriter(
this.stats = requireNonNull(stats, "stats is null");
requireNonNull(columnNames, "columnNames is null");
- this.orcTypes = OrcType.createOrcRowType(0, columnNames, types);
+ requireNonNull(inputOrcTypes, "inputOrcTypes is null");
+ this.orcTypes = inputOrcTypes.orElseGet(() -> OrcType.createOrcRowType(0, columnNames, types));
recordValidation(validation -> validation.setColumnNames(columnNames));
dwrfWriterEncryption = requireNonNull(encryption, "encryption is null");
@@ -646,7 +685,7 @@ private List bufferFileFooter()
Slice metadataSlice = metadataWriter.writeMetadata(metadata);
outputData.add(createDataOutput(metadataSlice));
- long numberOfRows = closedStripes.stream()
+ numberOfRows = closedStripes.stream()
.mapToLong(stripe -> stripe.getStripeInformation().getNumberOfRows())
.sum();
@@ -660,7 +699,7 @@ private List bufferFileFooter()
Map userMetadata = this.userMetadata.entrySet().stream()
.collect(Collectors.toMap(Entry::getKey, entry -> utf8Slice(entry.getValue())));
- List unencryptedStats = new ArrayList<>();
+ unencryptedStats = new ArrayList<>();
Map> encryptedStats = new HashMap<>();
addStatsRecursive(fileStats, 0, new HashMap<>(), unencryptedStats, encryptedStats);
Optional dwrfEncryption;
@@ -807,6 +846,18 @@ public void validate(OrcDataSource input)
DwrfKeyProvider.of(intermediateKeyMetadata.build()));
}
+ public long getFileRowCount()
+ {
+ checkState(closed, "File row count is not available until the writing has finished");
+ return numberOfRows;
+ }
+
+ public List getFileStats()
+ {
+ checkState(closed, "File statistics are not available until the writing has finished");
+ return unencryptedStats;
+ }
+
private static List toDenseList(Map data, int expectedSize)
{
checkArgument(data.size() == expectedSize);
diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataReader.java b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataReader.java
index ababc8c4bee5a..1482ec5077531 100644
--- a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataReader.java
+++ b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataReader.java
@@ -492,7 +492,7 @@ private static OrcType toType(OrcProto.Type type)
precision = Optional.of(type.getPrecision());
scale = Optional.of(type.getScale());
}
- return new OrcType(toTypeKind(type.getKind()), type.getSubtypesList(), type.getFieldNamesList(), length, precision, scale);
+ return new OrcType(toTypeKind(type.getKind()), type.getSubtypesList(), type.getFieldNamesList(), length, precision, scale, toMap(type.getAttributesList()));
}
private static List toType(List types)
@@ -546,6 +546,20 @@ private static OrcTypeKind toTypeKind(OrcProto.Type.Kind typeKind)
}
}
+ // This method assumes type attributes have no duplicate key
+ private static Map toMap(List attributes)
+ {
+ ImmutableMap.Builder results = new ImmutableMap.Builder<>();
+ if (attributes != null) {
+ for (OrcProto.StringPair attribute : attributes) {
+ if (attribute.hasKey() && attribute.hasValue()) {
+ results.put(attribute.getKey(), attribute.getValue());
+ }
+ }
+ }
+ return results.build();
+ }
+
private static StreamKind toStreamKind(OrcProto.Stream.Kind streamKind)
{
switch (streamKind) {
diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataWriter.java b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataWriter.java
index f14b5d221132e..c519df4d36726 100644
--- a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataWriter.java
+++ b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcMetadataWriter.java
@@ -33,10 +33,12 @@
import java.io.IOException;
import java.io.OutputStream;
import java.util.List;
+import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.lang.Math.toIntExact;
import static java.util.stream.Collectors.toList;
@@ -145,7 +147,8 @@ private static Type toType(OrcType type)
Builder builder = Type.newBuilder()
.setKind(toTypeKind(type.getOrcTypeKind()))
.addAllSubtypes(type.getFieldTypeIndexes())
- .addAllFieldNames(type.getFieldNames());
+ .addAllFieldNames(type.getFieldNames())
+ .addAllAttributes(toStringPairList(type.getAttributes()));
if (type.getLength().isPresent()) {
builder.setMaximumLength(type.getLength().get());
@@ -202,6 +205,16 @@ private static OrcProto.Type.Kind toTypeKind(OrcTypeKind orcTypeKind)
throw new IllegalArgumentException("Unsupported type: " + orcTypeKind);
}
+ private static List toStringPairList(Map attributes)
+ {
+ return attributes.entrySet().stream()
+ .map(entry -> OrcProto.StringPair.newBuilder()
+ .setKey(entry.getKey())
+ .setValue(entry.getValue())
+ .build())
+ .collect(toImmutableList());
+ }
+
private static OrcProto.ColumnStatistics toColumnStatistics(ColumnStatistics columnStatistics)
{
OrcProto.ColumnStatistics.Builder builder = OrcProto.ColumnStatistics.newBuilder();
diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcType.java b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcType.java
index 097cb2d3ebb42..94b57418afe68 100644
--- a/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcType.java
+++ b/presto-orc/src/main/java/com/facebook/presto/orc/metadata/OrcType.java
@@ -20,9 +20,11 @@
import com.facebook.presto.common.type.TypeSignatureParameter;
import com.facebook.presto.common.type.VarcharType;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
import java.util.Optional;
import static com.facebook.presto.common.type.BigintType.BIGINT;
@@ -79,6 +81,7 @@ public enum OrcTypeKind
private final Optional length;
private final Optional precision;
private final Optional scale;
+ private final Map attributes;
private OrcType(OrcTypeKind orcTypeKind)
{
@@ -101,6 +104,11 @@ private OrcType(OrcTypeKind orcTypeKind, List fieldTypeIndexes, List fieldTypeIndexes, List fieldNames, Optional length, Optional precision, Optional scale)
+ {
+ this(orcTypeKind, fieldTypeIndexes, fieldNames, length, precision, scale, ImmutableMap.of());
+ }
+
+ public OrcType(OrcTypeKind orcTypeKind, List fieldTypeIndexes, List fieldNames, Optional length, Optional precision, Optional scale, Map attributes)
{
this.orcTypeKind = requireNonNull(orcTypeKind, "typeKind is null");
this.fieldTypeIndexes = ImmutableList.copyOf(requireNonNull(fieldTypeIndexes, "fieldTypeIndexes is null"));
@@ -114,6 +122,7 @@ public OrcType(OrcTypeKind orcTypeKind, List fieldTypeIndexes, List getScale()
return scale;
}
+ public Map getAttributes()
+ {
+ return attributes;
+ }
+
@Override
public String toString()
{
diff --git a/presto-server/pom.xml b/presto-server/pom.xml
index 5f597a7138203..1c67432bc23b6 100644
--- a/presto-server/pom.xml
+++ b/presto-server/pom.xml
@@ -307,6 +307,14 @@
zip
provided
+
+
+ com.facebook.presto
+ presto-iceberg
+ ${project.version}
+ zip
+ provided
+
diff --git a/presto-server/src/main/assembly/presto.xml b/presto-server/src/main/assembly/presto.xml
index f135ceff45dfc..563ee2ed2c061 100644
--- a/presto-server/src/main/assembly/presto.xml
+++ b/presto-server/src/main/assembly/presto.xml
@@ -184,5 +184,9 @@
${project.build.directory}/dependency/presto-bigquery-${project.version}
plugin/presto-bigquery
+
+ ${project.build.directory}/dependency/presto-iceberg-${project.version}
+ plugin/iceberg
+