trinodb · mx123 · Jan 30, 2023 · Feb 13, 2023 · alexjo2144 · Feb 13, 2023
diff --git a/...n/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/AbstractDeltaLakePageSink.java b/...n/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/AbstractDeltaLakePageSink.java
@@ -38,7 +38,6 @@
 import io.trino.spi.block.Block;
 import io.trino.spi.connector.ConnectorPageSink;
 import io.trino.spi.connector.ConnectorSession;
-import io.trino.spi.type.TimestampWithTimeZoneType;
 import io.trino.spi.type.Type;
 import org.apache.hadoop.fs.Path;
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;
@@ -63,7 +62,6 @@
 import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.getParquetWriterPageSize;
 import static io.trino.plugin.deltalake.transactionlog.TransactionLogAccess.canonicalizeColumnName;
 import static io.trino.plugin.hive.util.HiveUtil.escapePathName;
-import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS;
 import static java.lang.String.format;
 import static java.util.Objects.requireNonNull;
 import static java.util.UUID.randomUUID;
@@ -159,8 +157,8 @@ public AbstractDeltaLakePageSink(
                 case REGULAR:
                     dataColumnHandles.add(column);
                     dataColumnsInputIndex.add(inputIndex);
-                    dataColumnNames.add(column.getName());
-                    dataColumnTypes.add(column.getType());
+                    dataColumnNames.add(column.getPhysicalName());
+                    dataColumnTypes.add(column.getSupportedType());
                     break;
                 case SYNTHESIZED:
                     processSynthesizedColumn(column);
@@ -457,28 +455,18 @@ private FileWriter createParquetFileWriter(String path)
         try {
             Closeable rollbackAction = () -> fileSystem.deleteFile(path);
 
-            List<Type> parquetTypes = dataColumnTypes.stream()
-                    .map(type -> {
-                        if (type instanceof TimestampWithTimeZoneType) {
-                            verify(((TimestampWithTimeZoneType) type).getPrecision() == 3, "Unsupported type: %s", type);
-                            return TIMESTAMP_MILLIS;
-                        }
-                        return type;
-                    })
-                    .collect(toImmutableList());
-
             // we use identity column mapping; input page already contains only data columns per
             // DataLagePageSink.getDataPage()
             int[] identityMapping = new int[dataColumnTypes.size()];
             for (int i = 0; i < identityMapping.length; ++i) {
                 identityMapping[i] = i;
             }
 
-            ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter(parquetTypes, dataColumnNames, false, false);
+            ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter(dataColumnTypes, dataColumnNames, false, false);
             return new ParquetFileWriter(
                     fileSystem.newOutputFile(path),
                     rollbackAction,
-                    parquetTypes,
+                    dataColumnTypes,
                     dataColumnNames,
                     schemaConverter.getMessageType(),
                     schemaConverter.getPrimitiveTypes(),

diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeColumnHandle.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeColumnHandle.java
@@ -17,19 +17,22 @@
 import com.fasterxml.jackson.annotation.JsonProperty;
 import io.trino.plugin.hive.HiveColumnHandle;
 import io.trino.spi.connector.ColumnHandle;
+import io.trino.spi.type.TimestampWithTimeZoneType;
 import io.trino.spi.type.Type;
 import org.openjdk.jol.info.ClassLayout;
 
 import java.util.Objects;
 import java.util.Optional;
 import java.util.OptionalInt;
 
+import static com.google.common.base.Verify.verify;
 import static io.airlift.slice.SizeOf.estimatedSizeOf;
 import static io.trino.plugin.deltalake.DeltaHiveTypeTranslator.toHiveType;
 import static io.trino.plugin.deltalake.DeltaLakeColumnType.SYNTHESIZED;
 import static io.trino.spi.type.BigintType.BIGINT;
 import static io.trino.spi.type.RowType.field;
 import static io.trino.spi.type.RowType.rowType;
+import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS;
 import static io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS;
 import static io.trino.spi.type.VarcharType.VARCHAR;
 import static java.lang.Math.toIntExact;
@@ -184,4 +187,14 @@ public static DeltaLakeColumnHandle fileModifiedTimeColumnHandle()
     {
         return new DeltaLakeColumnHandle(FILE_MODIFIED_TIME_COLUMN_NAME, FILE_MODIFIED_TIME_TYPE, OptionalInt.empty(), FILE_MODIFIED_TIME_COLUMN_NAME, FILE_MODIFIED_TIME_TYPE, SYNTHESIZED);
     }
+
+    public Type getSupportedType()
+    {
+        Type supportedType = getPhysicalType();
+        if (supportedType instanceof TimestampWithTimeZoneType timestamp) {
+            verify(timestamp.getPrecision() == 3, "Unsupported type: %s", supportedType);
+            supportedType = TIMESTAMP_MILLIS;
+        }
+        return supportedType;
+    }
 }
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMergeSink.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMergeSink.java
@@ -39,7 +39,6 @@
 import io.trino.spi.connector.ConnectorPageSource;
 import io.trino.spi.connector.ConnectorSession;
 import io.trino.spi.predicate.TupleDomain;
-import io.trino.spi.type.TimestampWithTimeZoneType;
 import io.trino.spi.type.Type;
 import org.apache.hadoop.fs.Path;
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;
@@ -61,7 +60,6 @@
 import java.util.function.Supplier;
 import java.util.stream.IntStream;
 
-import static com.google.common.base.Verify.verify;
 import static com.google.common.collect.ImmutableList.toImmutableList;
 import static io.airlift.json.JsonCodec.listJsonCodec;
 import static io.airlift.slice.Slices.utf8Slice;
@@ -76,7 +74,6 @@
 import static io.trino.spi.block.ColumnarRow.toColumnarRow;
 import static io.trino.spi.predicate.Utils.nativeValueToBlock;
 import static io.trino.spi.type.BigintType.BIGINT;
-import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS;
 import static io.trino.spi.type.TinyintType.TINYINT;
 import static io.trino.spi.type.VarcharType.VARCHAR;
 import static java.lang.Math.toIntExact;
@@ -355,20 +352,9 @@ private FileWriter createParquetFileWriter(String path, List<DeltaLakeColumnHand
         try {
             Closeable rollbackAction = () -> fileSystem.deleteFile(path);
 
-            List<Type> parquetTypes = dataColumns.stream()
-                    .map(column -> {
-                        Type type = column.getType();
-                        if (type instanceof TimestampWithTimeZoneType timestamp) {
-                            verify(timestamp.getPrecision() == 3, "Unsupported type: %s", type);
-                            return TIMESTAMP_MILLIS;
-                        }
-                        return type;
-                    })
-                    .collect(toImmutableList());
-
-            List<String> dataColumnNames = dataColumns.stream()
-                    .map(DeltaLakeColumnHandle::getName)
-                    .collect(toImmutableList());
+            List<String> dataColumnNames = dataColumns.stream().map(DeltaLakeColumnHandle::getPhysicalName).collect(toImmutableList());
+            List<Type> parquetTypes = dataColumns.stream().map(DeltaLakeColumnHandle::getSupportedType).collect(toImmutableList());
-            List<String> dataColumnNames = dataColumns.stream().map(DeltaLakeColumnHandle::getPhysicalName).collect(toImmutableList());
-            List<Type> parquetTypes = dataColumns.stream().map(DeltaLakeColumnHandle::getSupportedType).collect(toImmutableList());
+ImmutableList.Builder<String> dataColumnNames = ImmutableList.builder();
+ImmutableList.Builder<Type> parquetTypes = ImmutableList.builder();
+for (DeltaLakeColumnHandle column : dataColumns) {
+    dataColumnNames.add(..);
+     parquetTypes.add(...);
+}
-            List<String> dataColumnNames = dataColumns.stream().map(DeltaLakeColumnHandle::getPhysicalName).collect(toImmutableList());
-            List<Type> parquetTypes = dataColumns.stream().map(DeltaLakeColumnHandle::getSupportedType).collect(toImmutableList());
+ImmutableList.Builder<String> dataColumnNames = ImmutableList.builder();
+ImmutableList.Builder<Type> parquetTypes = ImmutableList.builder();
+for (DeltaLakeColumnHandle column : dataColumns) {
+    dataColumnNames.add(..);
+     parquetTypes.add(...);
+}
+
             ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter(
                     parquetTypes,
                     dataColumnNames,

diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java
@@ -42,6 +42,7 @@
 import io.trino.plugin.deltalake.transactionlog.AddFileEntry;
 import io.trino.plugin.deltalake.transactionlog.CdfFileEntry;
 import io.trino.plugin.deltalake.transactionlog.CommitInfoEntry;
+import io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.ColumnMappingMode;
 import io.trino.plugin.deltalake.transactionlog.MetadataEntry;
 import io.trino.plugin.deltalake.transactionlog.MetadataEntry.Format;
 import io.trino.plugin.deltalake.transactionlog.ProtocolEntry;
@@ -155,6 +156,7 @@
 import static com.google.common.collect.ImmutableList.toImmutableList;
 import static com.google.common.collect.ImmutableMap.toImmutableMap;
 import static com.google.common.collect.ImmutableSet.toImmutableSet;
+import static com.google.common.collect.MoreCollectors.onlyElement;
 import static com.google.common.collect.MoreCollectors.toOptional;
 import static io.trino.plugin.deltalake.DataFileInfo.DataFileType.DATA;
 import static io.trino.plugin.deltalake.DeltaLakeColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME;
@@ -277,6 +279,7 @@ public class DeltaLakeMetadata
     private static final int WRITER_VERSION = 2;
     // The highest writer version Trino supports writing to
     private static final int MAX_WRITER_VERSION = 4;
+    private static final int MAX_DML_WRITER_VERSION = 5;
     // This constant should be used only for a new table
     private static final ProtocolEntry DEFAULT_PROTOCOL = new ProtocolEntry(READER_VERSION, WRITER_VERSION);
     // Matches the dummy column Databricks stores in the metastore
@@ -1297,7 +1300,8 @@ public ConnectorInsertTableHandle beginInsert(ConnectorSession session, Connecto
             throw new TrinoException(NOT_SUPPORTED, "Inserts are not supported for tables with delta invariants");
         }
         checkUnsupportedGeneratedColumns(table.getMetadataEntry());
-        checkSupportedWriterVersion(session, table.getSchemaTableName());
+        checkUnsupportedColumnMapping(table.getMetadataEntry());
+        checkSupportedDmlWriterVersion(session, table);
 
         List<DeltaLakeColumnHandle> inputColumns = columns.stream()
                 .map(handle -> (DeltaLakeColumnHandle) handle)
@@ -1391,8 +1395,7 @@ public Optional<ConnectorOutputMetadata> finishInsert(
                             ISOLATION_LEVEL,
                             true));
 
-            // Note: during writes we want to preserve original case of partition columns
-            List<String> partitionColumns = handle.getMetadataEntry().getOriginalPartitionColumns();
+            List<String> partitionColumns = getWritePartitionColumnNames(handle.getMetadataEntry().getOriginalPartitionColumns(), handle.getInputColumns());
             appendAddFileEntries(transactionLogWriter, dataFileInfos, partitionColumns, true);
 
             transactionLogWriter.flush();
@@ -1410,6 +1413,22 @@ public Optional<ConnectorOutputMetadata> finishInsert(
         return Optional.empty();
     }
 
+    private static List<String> getWritePartitionColumnNames(List<String> originalPartitionColumns, List<DeltaLakeColumnHandle> dataColumns)
+    {
+        return originalPartitionColumns.stream()
+                .map(columnName -> {
+                    DeltaLakeColumnHandle dataColumn = dataColumns.stream()
+                            .filter(column -> columnName.equalsIgnoreCase(column.getName()))
+                            .collect(onlyElement());
+                    // Note: during writes we want to preserve original case of partition columns, if the column's name is not differ of column's physical name
+                    if (dataColumn.getPhysicalName().equalsIgnoreCase(columnName)) {
+                        return columnName;
+                    }
+                    return dataColumn.getPhysicalName();
+                })
+                .collect(toImmutableList());
+    }
+
     @Override
     public RowChangeParadigm getRowChangeParadigm(ConnectorSession session, ConnectorTableHandle tableHandle)
     {
@@ -1449,7 +1468,8 @@ public ConnectorMergeTableHandle beginMerge(ConnectorSession session, ConnectorT
             throw new TrinoException(NOT_SUPPORTED, "Writing to tables with CHECK constraints is not supported");
         }
         checkUnsupportedGeneratedColumns(handle.getMetadataEntry());
-        checkSupportedWriterVersion(session, handle.getSchemaTableName());
+        checkUnsupportedColumnMapping(handle.getMetadataEntry());
+        checkSupportedDmlWriterVersion(session, handle);
 
         ConnectorTableMetadata tableMetadata = getTableMetadata(session, handle);
 
@@ -1536,7 +1556,9 @@ public void finishMerge(ConnectorSession session, ConnectorMergeTableHandle tabl
                 transactionLogWriter.appendRemoveFileEntry(new RemoveFileEntry(file, writeTimestamp, true));
             }
 
-            List<String> partitionColumns = handle.getMetadataEntry().getOriginalPartitionColumns();
+            List<String> partitionColumns = getWritePartitionColumnNames(
+                    handle.getMetadataEntry().getOriginalPartitionColumns(),
+                    ((DeltaLakeMergeTableHandle) tableHandle).getInsertTableHandle().getInputColumns());
             appendAddFileEntries(transactionLogWriter, newFiles, partitionColumns, true);
 
             transactionLogWriter.flush();
@@ -1787,6 +1809,25 @@ private void checkUnsupportedGeneratedColumns(MetadataEntry metadataEntry)
         }
     }
 
+    private void checkUnsupportedColumnMapping(MetadataEntry metadataEntry)
+    {
+        ColumnMappingMode columnMappingMode = getColumnMappingMode(metadataEntry);
+        if (!(columnMappingMode == ColumnMappingMode.NONE || columnMappingMode == ColumnMappingMode.NAME)) {
+            throw new TrinoException(NOT_SUPPORTED, "Writing with column mapping id is not supported");
+        }
+    }
+
+    private void checkSupportedDmlWriterVersion(ConnectorSession session, DeltaLakeTableHandle table)
+    {
+        SchemaTableName schemaTableName = table.getSchemaTableName();
+        int requiredWriterVersion = getProtocolEntry(session, schemaTableName).getMinWriterVersion();
+        ColumnMappingMode columnMappingMode = getColumnMappingMode(table.getMetadataEntry());
+        if (requiredWriterVersion == MAX_DML_WRITER_VERSION && (columnMappingMode == ColumnMappingMode.NONE || columnMappingMode == ColumnMappingMode.NAME)) {
+            return;
+        }
+        checkSupportedWriterVersion(session, schemaTableName);
+    }
+
     private void checkSupportedWriterVersion(ConnectorSession session, SchemaTableName schemaTableName)
     {
         int requiredWriterVersion = getProtocolEntry(session, schemaTableName).getMinWriterVersion();

diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeWriter.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeWriter.java
@@ -173,8 +173,8 @@ public long getRowCount()
     public DataFileInfo getDataFileInfo()
             throws IOException
     {
-        List<String> dataColumnNames = columnHandles.stream().map(DeltaLakeColumnHandle::getName).collect(toImmutableList());
-        List<Type> dataColumnTypes = columnHandles.stream().map(DeltaLakeColumnHandle::getType).collect(toImmutableList());
+        List<String> dataColumnNames = columnHandles.stream().map(DeltaLakeColumnHandle::getPhysicalName).collect(toImmutableList());
+        List<Type> dataColumnTypes = columnHandles.stream().map(DeltaLakeColumnHandle::getSupportedType).collect(toImmutableList());
         return new DataFileInfo(
                 relativeFilePath,
                 getWrittenBytes(),