-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Support DML operations on Delta tables with name column mapping
#15837
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,19 +17,22 @@ | |
| import com.fasterxml.jackson.annotation.JsonProperty; | ||
| import io.trino.plugin.hive.HiveColumnHandle; | ||
| import io.trino.spi.connector.ColumnHandle; | ||
| import io.trino.spi.type.TimestampWithTimeZoneType; | ||
| import io.trino.spi.type.Type; | ||
| import org.openjdk.jol.info.ClassLayout; | ||
|
|
||
| import java.util.Objects; | ||
| import java.util.Optional; | ||
| import java.util.OptionalInt; | ||
|
|
||
| import static com.google.common.base.Verify.verify; | ||
| import static io.airlift.slice.SizeOf.estimatedSizeOf; | ||
| import static io.trino.plugin.deltalake.DeltaHiveTypeTranslator.toHiveType; | ||
| import static io.trino.plugin.deltalake.DeltaLakeColumnType.SYNTHESIZED; | ||
| import static io.trino.spi.type.BigintType.BIGINT; | ||
| import static io.trino.spi.type.RowType.field; | ||
| import static io.trino.spi.type.RowType.rowType; | ||
| import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS; | ||
| import static io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS; | ||
| import static io.trino.spi.type.VarcharType.VARCHAR; | ||
| import static java.lang.Math.toIntExact; | ||
|
|
@@ -184,4 +187,14 @@ public static DeltaLakeColumnHandle fileModifiedTimeColumnHandle() | |
| { | ||
| return new DeltaLakeColumnHandle(FILE_MODIFIED_TIME_COLUMN_NAME, FILE_MODIFIED_TIME_TYPE, OptionalInt.empty(), FILE_MODIFIED_TIME_COLUMN_NAME, FILE_MODIFIED_TIME_TYPE, SYNTHESIZED); | ||
| } | ||
|
|
||
| public Type getSupportedType() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Needs an
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd also call this |
||
| { | ||
| Type supportedType = getPhysicalType(); | ||
| if (supportedType instanceof TimestampWithTimeZoneType timestamp) { | ||
| verify(timestamp.getPrecision() == 3, "Unsupported type: %s", supportedType); | ||
| supportedType = TIMESTAMP_MILLIS; | ||
| } | ||
| return supportedType; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -39,7 +39,6 @@ | |||||||||||||||||
| import io.trino.spi.connector.ConnectorPageSource; | ||||||||||||||||||
| import io.trino.spi.connector.ConnectorSession; | ||||||||||||||||||
| import io.trino.spi.predicate.TupleDomain; | ||||||||||||||||||
| import io.trino.spi.type.TimestampWithTimeZoneType; | ||||||||||||||||||
| import io.trino.spi.type.Type; | ||||||||||||||||||
| import org.apache.hadoop.fs.Path; | ||||||||||||||||||
| import org.apache.parquet.hadoop.metadata.CompressionCodecName; | ||||||||||||||||||
|
|
@@ -61,7 +60,6 @@ | |||||||||||||||||
| import java.util.function.Supplier; | ||||||||||||||||||
| import java.util.stream.IntStream; | ||||||||||||||||||
|
|
||||||||||||||||||
| import static com.google.common.base.Verify.verify; | ||||||||||||||||||
| import static com.google.common.collect.ImmutableList.toImmutableList; | ||||||||||||||||||
| import static io.airlift.json.JsonCodec.listJsonCodec; | ||||||||||||||||||
| import static io.airlift.slice.Slices.utf8Slice; | ||||||||||||||||||
|
|
@@ -76,7 +74,6 @@ | |||||||||||||||||
| import static io.trino.spi.block.ColumnarRow.toColumnarRow; | ||||||||||||||||||
| import static io.trino.spi.predicate.Utils.nativeValueToBlock; | ||||||||||||||||||
| import static io.trino.spi.type.BigintType.BIGINT; | ||||||||||||||||||
| import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS; | ||||||||||||||||||
| import static io.trino.spi.type.TinyintType.TINYINT; | ||||||||||||||||||
| import static io.trino.spi.type.VarcharType.VARCHAR; | ||||||||||||||||||
| import static java.lang.Math.toIntExact; | ||||||||||||||||||
|
|
@@ -355,20 +352,9 @@ private FileWriter createParquetFileWriter(String path, List<DeltaLakeColumnHand | |||||||||||||||||
| try { | ||||||||||||||||||
| Closeable rollbackAction = () -> fileSystem.deleteFile(path); | ||||||||||||||||||
|
|
||||||||||||||||||
| List<Type> parquetTypes = dataColumns.stream() | ||||||||||||||||||
| .map(column -> { | ||||||||||||||||||
| Type type = column.getType(); | ||||||||||||||||||
| if (type instanceof TimestampWithTimeZoneType timestamp) { | ||||||||||||||||||
| verify(timestamp.getPrecision() == 3, "Unsupported type: %s", type); | ||||||||||||||||||
| return TIMESTAMP_MILLIS; | ||||||||||||||||||
| } | ||||||||||||||||||
| return type; | ||||||||||||||||||
| }) | ||||||||||||||||||
| .collect(toImmutableList()); | ||||||||||||||||||
|
|
||||||||||||||||||
| List<String> dataColumnNames = dataColumns.stream() | ||||||||||||||||||
| .map(DeltaLakeColumnHandle::getName) | ||||||||||||||||||
| .collect(toImmutableList()); | ||||||||||||||||||
| List<String> dataColumnNames = dataColumns.stream().map(DeltaLakeColumnHandle::getPhysicalName).collect(toImmutableList()); | ||||||||||||||||||
| List<Type> parquetTypes = dataColumns.stream().map(DeltaLakeColumnHandle::getSupportedType).collect(toImmutableList()); | ||||||||||||||||||
|
||||||||||||||||||
| List<String> dataColumnNames = dataColumns.stream().map(DeltaLakeColumnHandle::getPhysicalName).collect(toImmutableList()); | |
| List<Type> parquetTypes = dataColumns.stream().map(DeltaLakeColumnHandle::getSupportedType).collect(toImmutableList()); | |
| ImmutableList.Builder<String> dataColumnNames = ImmutableList.builder(); | |
| ImmutableList.Builder<Type> parquetTypes = ImmutableList.builder(); | |
| for (DeltaLakeColumnHandle column : dataColumns) { | |
| dataColumnNames.add(..); | |
| parquetTypes.add(...); | |
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,6 +42,7 @@ | |
| import io.trino.plugin.deltalake.transactionlog.AddFileEntry; | ||
| import io.trino.plugin.deltalake.transactionlog.CdfFileEntry; | ||
| import io.trino.plugin.deltalake.transactionlog.CommitInfoEntry; | ||
| import io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.ColumnMappingMode; | ||
| import io.trino.plugin.deltalake.transactionlog.MetadataEntry; | ||
| import io.trino.plugin.deltalake.transactionlog.MetadataEntry.Format; | ||
| import io.trino.plugin.deltalake.transactionlog.ProtocolEntry; | ||
|
|
@@ -155,6 +156,7 @@ | |
| import static com.google.common.collect.ImmutableList.toImmutableList; | ||
| import static com.google.common.collect.ImmutableMap.toImmutableMap; | ||
| import static com.google.common.collect.ImmutableSet.toImmutableSet; | ||
| import static com.google.common.collect.MoreCollectors.onlyElement; | ||
| import static com.google.common.collect.MoreCollectors.toOptional; | ||
| import static io.trino.plugin.deltalake.DataFileInfo.DataFileType.DATA; | ||
| import static io.trino.plugin.deltalake.DeltaLakeColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME; | ||
|
|
@@ -277,6 +279,7 @@ public class DeltaLakeMetadata | |
| private static final int WRITER_VERSION = 2; | ||
| // The highest writer version Trino supports writing to | ||
| private static final int MAX_WRITER_VERSION = 4; | ||
mx123 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| private static final int MAX_DML_WRITER_VERSION = 5; | ||
| // This constant should be used only for a new table | ||
| private static final ProtocolEntry DEFAULT_PROTOCOL = new ProtocolEntry(READER_VERSION, WRITER_VERSION); | ||
| // Matches the dummy column Databricks stores in the metastore | ||
|
|
@@ -1297,7 +1300,8 @@ public ConnectorInsertTableHandle beginInsert(ConnectorSession session, Connecto | |
| throw new TrinoException(NOT_SUPPORTED, "Inserts are not supported for tables with delta invariants"); | ||
| } | ||
| checkUnsupportedGeneratedColumns(table.getMetadataEntry()); | ||
| checkSupportedWriterVersion(session, table.getSchemaTableName()); | ||
| checkUnsupportedColumnMapping(table.getMetadataEntry()); | ||
| checkSupportedDmlWriterVersion(session, table); | ||
|
|
||
| List<DeltaLakeColumnHandle> inputColumns = columns.stream() | ||
| .map(handle -> (DeltaLakeColumnHandle) handle) | ||
|
|
@@ -1391,8 +1395,7 @@ public Optional<ConnectorOutputMetadata> finishInsert( | |
| ISOLATION_LEVEL, | ||
| true)); | ||
|
|
||
| // Note: during writes we want to preserve original case of partition columns | ||
| List<String> partitionColumns = handle.getMetadataEntry().getOriginalPartitionColumns(); | ||
| List<String> partitionColumns = getWritePartitionColumnNames(handle.getMetadataEntry().getOriginalPartitionColumns(), handle.getInputColumns()); | ||
| appendAddFileEntries(transactionLogWriter, dataFileInfos, partitionColumns, true); | ||
|
|
||
| transactionLogWriter.flush(); | ||
|
|
@@ -1410,6 +1413,22 @@ public Optional<ConnectorOutputMetadata> finishInsert( | |
| return Optional.empty(); | ||
| } | ||
|
|
||
| private static List<String> getWritePartitionColumnNames(List<String> originalPartitionColumns, List<DeltaLakeColumnHandle> dataColumns) | ||
| { | ||
| return originalPartitionColumns.stream() | ||
| .map(columnName -> { | ||
| DeltaLakeColumnHandle dataColumn = dataColumns.stream() | ||
| .filter(column -> columnName.equalsIgnoreCase(column.getName())) | ||
| .collect(onlyElement()); | ||
|
||
| // Note: during writes we want to preserve original case of partition columns, if the column's name is not differ of column's physical name | ||
|
||
| if (dataColumn.getPhysicalName().equalsIgnoreCase(columnName)) { | ||
|
||
| return columnName; | ||
| } | ||
| return dataColumn.getPhysicalName(); | ||
| }) | ||
| .collect(toImmutableList()); | ||
| } | ||
|
|
||
| @Override | ||
| public RowChangeParadigm getRowChangeParadigm(ConnectorSession session, ConnectorTableHandle tableHandle) | ||
| { | ||
|
|
@@ -1449,7 +1468,8 @@ public ConnectorMergeTableHandle beginMerge(ConnectorSession session, ConnectorT | |
| throw new TrinoException(NOT_SUPPORTED, "Writing to tables with CHECK constraints is not supported"); | ||
| } | ||
| checkUnsupportedGeneratedColumns(handle.getMetadataEntry()); | ||
| checkSupportedWriterVersion(session, handle.getSchemaTableName()); | ||
| checkUnsupportedColumnMapping(handle.getMetadataEntry()); | ||
| checkSupportedDmlWriterVersion(session, handle); | ||
|
|
||
| ConnectorTableMetadata tableMetadata = getTableMetadata(session, handle); | ||
|
|
||
|
|
@@ -1536,7 +1556,9 @@ public void finishMerge(ConnectorSession session, ConnectorMergeTableHandle tabl | |
| transactionLogWriter.appendRemoveFileEntry(new RemoveFileEntry(file, writeTimestamp, true)); | ||
| } | ||
|
|
||
| List<String> partitionColumns = handle.getMetadataEntry().getOriginalPartitionColumns(); | ||
| List<String> partitionColumns = getWritePartitionColumnNames( | ||
| handle.getMetadataEntry().getOriginalPartitionColumns(), | ||
| ((DeltaLakeMergeTableHandle) tableHandle).getInsertTableHandle().getInputColumns()); | ||
| appendAddFileEntries(transactionLogWriter, newFiles, partitionColumns, true); | ||
|
|
||
| transactionLogWriter.flush(); | ||
|
|
@@ -1787,6 +1809,25 @@ private void checkUnsupportedGeneratedColumns(MetadataEntry metadataEntry) | |
| } | ||
| } | ||
|
|
||
| private void checkUnsupportedColumnMapping(MetadataEntry metadataEntry) | ||
| { | ||
| ColumnMappingMode columnMappingMode = getColumnMappingMode(metadataEntry); | ||
| if (!(columnMappingMode == ColumnMappingMode.NONE || columnMappingMode == ColumnMappingMode.NAME)) { | ||
| throw new TrinoException(NOT_SUPPORTED, "Writing with column mapping id is not supported"); | ||
| } | ||
| } | ||
|
|
||
| private void checkSupportedDmlWriterVersion(ConnectorSession session, DeltaLakeTableHandle table) | ||
| { | ||
| SchemaTableName schemaTableName = table.getSchemaTableName(); | ||
| int requiredWriterVersion = getProtocolEntry(session, schemaTableName).getMinWriterVersion(); | ||
| ColumnMappingMode columnMappingMode = getColumnMappingMode(table.getMetadataEntry()); | ||
| if (requiredWriterVersion == MAX_DML_WRITER_VERSION && (columnMappingMode == ColumnMappingMode.NONE || columnMappingMode == ColumnMappingMode.NAME)) { | ||
| return; | ||
| } | ||
| checkSupportedWriterVersion(session, schemaTableName); | ||
| } | ||
|
|
||
| private void checkSupportedWriterVersion(ConnectorSession session, SchemaTableName schemaTableName) | ||
| { | ||
| int requiredWriterVersion = getProtocolEntry(session, schemaTableName).getMinWriterVersion(); | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.