diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java index ec24fc860fd1..4c1e3c4f68ea 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java @@ -16,6 +16,7 @@ import com.google.common.base.Stopwatch; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.io.Resources; import io.airlift.units.DataSize; import io.trino.Session; import io.trino.execution.QueryInfo; @@ -64,7 +65,9 @@ import static io.trino.plugin.tpch.TpchMetadata.TINY_SCHEMA_NAME; import static io.trino.spi.type.VarcharType.VARCHAR; import static io.trino.sql.planner.optimizations.PlanNodeSearcher.searchFrom; +import static io.trino.testing.DataProviders.cartesianProduct; import static io.trino.testing.DataProviders.toDataProvider; +import static io.trino.testing.DataProviders.trueFalse; import static io.trino.testing.MaterializedResult.resultBuilder; import static io.trino.testing.QueryAssertions.copyTpchTables; import static io.trino.testing.TestingAccessControlManager.TestingPrivilegeType.EXECUTE_FUNCTION; @@ -76,6 +79,7 @@ import static io.trino.testing.containers.Minio.MINIO_ACCESS_KEY; import static io.trino.testing.containers.Minio.MINIO_SECRET_KEY; import static java.lang.String.format; +import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -1113,6 +1117,386 @@ private void testCreateTableColumnMappingMode(ColumnMappingMode mode, Consumer>, nested STRUCT) + USING delta + LOCATION 's3://?/databricks-compatibility-test-?' + TBLPROPERTIES ( + 'delta.checkpointInterval' = 1, + 'delta.checkpoint.writeStatsAsJson' = ?, + 'delta.checkpoint.writeStatsAsStruct' = ?, + 'delta.columnMapping.mode' = 'id' +) +``` diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_column_mapping_id/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_column_mapping_id/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..10d09d099c0f --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_column_mapping_id/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1693519271450,"operation":"CREATE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[]","properties":"{\"delta.checkpoint.writeStatsAsStruct\":\"%WRITE_STATS_AS_STRUCT%\",\"delta.checkpoint.writeStatsAsJson\":\"%WRITE_STATS_AS_JSON%\",\"delta.columnMapping.mode\":\"id\",\"delta.checkpointInterval\":\"1\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.4.0 Delta-Lake/2.4.0","txnId":"96d45bbb-6982-43d2-a82d-09fddc286e8d"}} +{"protocol":{"minReaderVersion":2,"minWriterVersion":5}} +{"metaData":{"id":"5d67ce60-ff62-4060-a468-f07166b10784","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a_number\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"col-9ec574bd-c2b3-4404-98b3-5c3c892fd00d\"}},{\"name\":\"a_string\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":2,\"delta.columnMapping.physicalName\":\"col-aeb0f18a-ca53-4b1c-b77e-96f718f1586c\"}},{\"name\":\"array_col\",\"type\":{\"type\":\"array\",\"elementType\":{\"type\":\"struct\",\"fields\":[{\"name\":\"array_struct_element\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":4,\"delta.columnMapping.physicalName\":\"col-84140a5c-de0b-4fef-adb1-c7185e00f5cf\"}}]},\"containsNull\":true},\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":3,\"delta.columnMapping.physicalName\":\"col-60f3d82b-9544-4d4a-aa82-0b4d7586b5c3\"}},{\"name\":\"nested\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"field1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":6,\"delta.columnMapping.physicalName\":\"col-bdac20c2-9374-4e8c-a91e-db4015b22398\"}}]},\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":5,\"delta.columnMapping.physicalName\":\"col-809ab77c-7ecf-47ff-b610-e273f8061a9a\"}}]}","partitionColumns":[],"configuration":{"delta.checkpoint.writeStatsAsStruct":"%WRITE_STATS_AS_STRUCT%","delta.checkpoint.writeStatsAsJson":"%WRITE_STATS_AS_JSON%","delta.columnMapping.mode":"id","delta.columnMapping.maxColumnId":"6","delta.checkpointInterval":"1"},"createdTime":1693519271098}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_column_mapping_name/README.md b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_column_mapping_name/README.md new file mode 100644 index 000000000000..4bf8e91209be --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_column_mapping_name/README.md @@ -0,0 +1,14 @@ +Data generated using OSS Delta Lake 2.4.0: + +```sql +CREATE TABLE default.? + (a_number INT, a_string STRING, array_col ARRAY>, nested STRUCT) + USING delta + LOCATION 's3://?/databricks-compatibility-test-?' + TBLPROPERTIES ( + 'delta.checkpointInterval' = 1, + 'delta.checkpoint.writeStatsAsJson' = ?, + 'delta.checkpoint.writeStatsAsStruct' = ?, + 'delta.columnMapping.mode' = 'name' +) +``` diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_column_mapping_name/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_column_mapping_name/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..f9c2e0c6e588 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_column_mapping_name/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1693519354203,"operation":"CREATE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[]","properties":"{\"delta.checkpoint.writeStatsAsStruct\":\"%WRITE_STATS_AS_STRUCT%\",\"delta.checkpoint.writeStatsAsJson\":\"%WRITE_STATS_AS_JSON%\",\"delta.columnMapping.mode\":\"name\",\"delta.checkpointInterval\":\"1\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.4.0 Delta-Lake/2.4.0","txnId":"28cb6685-70da-421a-9b80-5a2a06624c9b"}} +{"protocol":{"minReaderVersion":2,"minWriterVersion":5}} +{"metaData":{"id":"a6928233-764e-4b76-8d1e-044a856bf7e7","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a_number\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"col-890308a4-0e82-43e6-a5e0-6f853db57737\"}},{\"name\":\"a_string\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":2,\"delta.columnMapping.physicalName\":\"col-2e92215e-484c-4565-aa4c-7aed83383294\"}},{\"name\":\"array_col\",\"type\":{\"type\":\"array\",\"elementType\":{\"type\":\"struct\",\"fields\":[{\"name\":\"array_struct_element\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":4,\"delta.columnMapping.physicalName\":\"col-0b74eacc-2dd1-445b-836c-db7296873b26\"}}]},\"containsNull\":true},\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":3,\"delta.columnMapping.physicalName\":\"col-b51b17b5-63c2-4b6f-92e1-3fcf4f8be41f\"}},{\"name\":\"nested\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"field1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":6,\"delta.columnMapping.physicalName\":\"col-c5977c8b-f597-4f6c-9114-2a5d06ba3616\"}}]},\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":5,\"delta.columnMapping.physicalName\":\"col-a815bde9-3909-494d-a52c-02b662aef21e\"}}]}","partitionColumns":[],"configuration":{"delta.checkpoint.writeStatsAsStruct":"%WRITE_STATS_AS_STRUCT%","delta.checkpoint.writeStatsAsJson":"%WRITE_STATS_AS_JSON%","delta.columnMapping.mode":"name","delta.columnMapping.maxColumnId":"6","delta.checkpointInterval":"1"},"createdTime":1693519354164}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_column_mapping_none/README.md b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_column_mapping_none/README.md new file mode 100644 index 000000000000..56089c031b22 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_column_mapping_none/README.md @@ -0,0 +1,14 @@ +Data generated using OSS Delta Lake 2.4.0: + +```sql +CREATE TABLE default.? + (a_number INT, a_string STRING, array_col ARRAY>, nested STRUCT) + USING delta + LOCATION 's3://?/databricks-compatibility-test-?' + TBLPROPERTIES ( + 'delta.checkpointInterval' = 1, + 'delta.checkpoint.writeStatsAsJson' = ?, + 'delta.checkpoint.writeStatsAsStruct' = ?, + 'delta.columnMapping.mode' = 'none' +) +``` diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_column_mapping_none/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_column_mapping_none/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..b63e18b58ec4 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_column_mapping_none/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1692918419268,"operation":"CREATE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[]","properties":"{\"delta.checkpoint.writeStatsAsStruct\":\"%WRITE_STATS_AS_STRUCT%\",\"delta.checkpoint.writeStatsAsJson\":\"%WRITE_STATS_AS_JSON%\",\"delta.columnMapping.mode\":\"none\",\"delta.checkpointInterval\":\"1\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.4.0 Delta-Lake/2.4.0","txnId":"304ccb52-a1cd-4a1e-b173-8b24fef8b296"}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"6133a0a3-3ccc-4784-a5a7-73681a89088d","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a_number\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a_string\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"array_col\",\"type\":{\"type\":\"array\",\"elementType\":{\"type\":\"struct\",\"fields\":[{\"name\":\"array_struct_element\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"nested\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"field1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpoint.writeStatsAsStruct":"%WRITE_STATS_AS_STRUCT%","delta.checkpoint.writeStatsAsJson":"%WRITE_STATS_AS_JSON%","delta.columnMapping.mode":"none","delta.checkpointInterval":"1"},"createdTime":1692918418836}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_id/README.md b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_id/README.md new file mode 100644 index 000000000000..a2d717ad4b5b --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_id/README.md @@ -0,0 +1,15 @@ +Data generated using OSS Delta Lake 2.4.0: + +```sql +CREATE TABLE default.? + (a_number INT, a_string STRING, array_col ARRAY>, nested STRUCT) + USING delta + PARTITIONED BY (a_string) + LOCATION 's3://?/databricks-compatibility-test-?' + TBLPROPERTIES ( + 'delta.checkpointInterval' = 1, + 'delta.checkpoint.writeStatsAsJson' = ?, + 'delta.checkpoint.writeStatsAsStruct' = ?, + 'delta.columnMapping.mode' = 'id' +) +``` diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_id/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_id/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..01e9ef4646a3 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_id/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1693519951655,"operation":"CREATE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[\"a_string\"]","properties":"{\"delta.checkpoint.writeStatsAsStruct\":\"%WRITE_STATS_AS_STRUCT%\",\"delta.checkpoint.writeStatsAsJson\":\"%WRITE_STATS_AS_JSON%\",\"delta.columnMapping.mode\":\"id\",\"delta.checkpointInterval\":\"1\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.4.0 Delta-Lake/2.4.0","txnId":"84f94c35-356e-4cd6-b6bd-2732beca650b"}} +{"protocol":{"minReaderVersion":2,"minWriterVersion":5}} +{"metaData":{"id":"758acf5d-d3c8-4aca-96c8-bf13c100be3e","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a_number\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"col-58a29de0-b7f2-47de-a2cf-1c3caf0df01b\"}},{\"name\":\"a_string\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":2,\"delta.columnMapping.physicalName\":\"col-bd258e61-4f78-4401-85ff-4e2230292a45\"}},{\"name\":\"array_col\",\"type\":{\"type\":\"array\",\"elementType\":{\"type\":\"struct\",\"fields\":[{\"name\":\"array_struct_element\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":4,\"delta.columnMapping.physicalName\":\"col-c3ba9608-1372-4c21-ab41-b46ccc16798c\"}}]},\"containsNull\":true},\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":3,\"delta.columnMapping.physicalName\":\"col-e0fe1af0-5508-4878-b8ff-35754833ffd3\"}},{\"name\":\"nested\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"field1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":6,\"delta.columnMapping.physicalName\":\"col-e6e138b7-6a3d-4387-9e0b-71b67ea0c921\"}}]},\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":5,\"delta.columnMapping.physicalName\":\"col-e920df12-fc88-4b98-be5a-8915a220b9f0\"}}]}","partitionColumns":["a_string"],"configuration":{"delta.checkpoint.writeStatsAsStruct":"%WRITE_STATS_AS_STRUCT%","delta.checkpoint.writeStatsAsJson":"%WRITE_STATS_AS_JSON%","delta.columnMapping.mode":"id","delta.columnMapping.maxColumnId":"6","delta.checkpointInterval":"1"},"createdTime":1693519951528}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_name/README.md b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_name/README.md new file mode 100644 index 000000000000..66d00430e53d --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_name/README.md @@ -0,0 +1,15 @@ +Data generated using OSS Delta Lake 2.4.0: + +```sql +CREATE TABLE default.? + (a_number INT, a_string STRING, array_col ARRAY>, nested STRUCT) + USING delta + PARTITIONED BY (a_string) + LOCATION 's3://?/databricks-compatibility-test-?' + TBLPROPERTIES ( + 'delta.checkpointInterval' = 1, + 'delta.checkpoint.writeStatsAsJson' = ?, + 'delta.checkpoint.writeStatsAsStruct' = ?, + 'delta.columnMapping.mode' = 'name' +) +``` diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_name/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_name/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..e3524a0c454b --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_name/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1693520067025,"operation":"CREATE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[\"a_string\"]","properties":"{\"delta.checkpoint.writeStatsAsStruct\":\"%WRITE_STATS_AS_STRUCT%\",\"delta.checkpoint.writeStatsAsJson\":\"%WRITE_STATS_AS_JSON%\",\"delta.columnMapping.mode\":\"name\",\"delta.checkpointInterval\":\"1\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.4.0 Delta-Lake/2.4.0","txnId":"1157c365-871e-46e8-9905-4b73c922ffa1"}} +{"protocol":{"minReaderVersion":2,"minWriterVersion":5}} +{"metaData":{"id":"c5e86f5f-3640-46d9-a8fe-a66b7209d6bf","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a_number\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"col-94069d81-20a5-4298-a36d-4e2a8832fe75\"}},{\"name\":\"a_string\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":2,\"delta.columnMapping.physicalName\":\"col-64016fbf-79dd-4950-932e-790b4009795a\"}},{\"name\":\"array_col\",\"type\":{\"type\":\"array\",\"elementType\":{\"type\":\"struct\",\"fields\":[{\"name\":\"array_struct_element\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":4,\"delta.columnMapping.physicalName\":\"col-fb5808bc-196d-4d3d-8269-3e3706531671\"}}]},\"containsNull\":true},\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":3,\"delta.columnMapping.physicalName\":\"col-423133f1-1d1e-427a-ae7f-547f1ba05ff8\"}},{\"name\":\"nested\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"field1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":6,\"delta.columnMapping.physicalName\":\"col-fcecca1c-fa2e-4f30-b580-7f49f7fc9af8\"}}]},\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":5,\"delta.columnMapping.physicalName\":\"col-dec7441b-2164-4fa3-ba6d-104aa480231e\"}}]}","partitionColumns":["a_string"],"configuration":{"delta.checkpoint.writeStatsAsStruct":"%WRITE_STATS_AS_STRUCT%","delta.checkpoint.writeStatsAsJson":"%WRITE_STATS_AS_JSON%","delta.columnMapping.mode":"name","delta.columnMapping.maxColumnId":"6","delta.checkpointInterval":"1"},"createdTime":1693520066854}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_none/README.md b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_none/README.md new file mode 100644 index 000000000000..006af667c8a2 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_none/README.md @@ -0,0 +1,15 @@ +Data generated using OSS Delta Lake 2.4.0: + +```sql +CREATE TABLE default.? + (a_number INT, a_string STRING, array_col ARRAY>, nested STRUCT) + USING delta + PARTITIONED BY (a_string) + LOCATION 's3://?/databricks-compatibility-test-?' + TBLPROPERTIES ( + 'delta.checkpointInterval' = 1, + 'delta.checkpoint.writeStatsAsJson' = ?, + 'delta.checkpoint.writeStatsAsStruct' = ?, + 'delta.columnMapping.mode' = 'none' +) +``` diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_none/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_none/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..0aee0f5b3b5c --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/write_stats_as_json_partition_column_mapping_none/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1692881426323,"operation":"CREATE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[\"a_string\"]","properties":"{\"delta.checkpoint.writeStatsAsStruct\":\"%WRITE_STATS_AS_STRUCT%\",\"delta.checkpoint.writeStatsAsJson\":\"%WRITE_STATS_AS_JSON%\",\"delta.columnMapping.mode\":\"none\",\"delta.checkpointInterval\":\"1\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.4.0 Delta-Lake/2.4.0","txnId":"5e2013a2-9c64-4bd9-b57c-1b6219204f55"}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"6ddd355f-e321-455d-97c1-2bddf8c4af01","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a_number\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a_string\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"array_col\",\"type\":{\"type\":\"array\",\"elementType\":{\"type\":\"struct\",\"fields\":[{\"name\":\"array_struct_element\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"nested\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"field1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["a_string"],"configuration":{"delta.checkpoint.writeStatsAsStruct":"%WRITE_STATS_AS_STRUCT%","delta.checkpoint.writeStatsAsJson":"%WRITE_STATS_AS_JSON%","delta.columnMapping.mode":"none","delta.checkpointInterval":"1"},"createdTime":1692881425840}} diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeColumnMappingMode.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeColumnMappingMode.java index ee077b682f8b..819ad17000a2 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeColumnMappingMode.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeColumnMappingMode.java @@ -14,24 +14,17 @@ package io.trino.tests.product.deltalake; import com.google.common.collect.ImmutableList; -import io.trino.tempto.assertions.QueryAssert; import io.trino.tempto.assertions.QueryAssert.Row; -import io.trino.tempto.query.QueryResult; import io.trino.testng.services.Flaky; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.math.BigDecimal; -import java.time.LocalDate; import java.util.List; import java.util.function.BiConsumer; import java.util.function.Consumer; -import static com.google.common.collect.ImmutableList.toImmutableList; import static io.trino.tempto.assertions.QueryAssert.Row.row; import static io.trino.tempto.assertions.QueryAssert.assertQueryFailure; -import static io.trino.testing.DataProviders.cartesianProduct; -import static io.trino.testing.DataProviders.trueFalse; import static io.trino.testing.TestingNames.randomNameSuffix; import static io.trino.tests.product.TestGroups.DELTA_LAKE_DATABRICKS; import static io.trino.tests.product.TestGroups.DELTA_LAKE_EXCLUDE_104; @@ -209,169 +202,6 @@ private void testColumnMappingMode(Consumer createTable) } } - @Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, DELTA_LAKE_EXCLUDE_91, - PROFILE_SPECIFIC_TESTS}, dataProvider = "columnMappingWithTrueAndFalseDataProvider") - @Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH) - public void testTrinoColumnMappingModeAllDataTypes(String mode, boolean partitioned) - { - testColumnMappingModeAllDataTypes(tableName -> onTrino().executeQuery("" + - "CREATE TABLE delta.default." + tableName + " (" + - " a_boolean BOOLEAN," + - " a_tinyint TINYINT," + - " a_smallint SMALLINT," + - " a_int INT," + - " a_bigint BIGINT," + - " a_decimal_5_2 DECIMAL(5,2)," + - " a_decimal_21_3 DECIMAL(21,3)," + - " a_double DOUBLE," + - " a_float REAL," + - " a_string VARCHAR," + - " a_date DATE," + - " a_timestamp TIMESTAMP(3) WITH TIME ZONE," + - " a_binary VARBINARY," + - " a_string_array ARRAY(VARCHAR)," + - " a_struct_array ARRAY(ROW(a_string VARCHAR))," + - " a_map MAP(VARCHAR, VARCHAR)," + - " a_complex_map MAP(VARCHAR, ROW(a_string VARCHAR))," + - " a_struct ROW(a_string VARCHAR, a_int INT)," + - " a_complex_struct ROW(nested_struct ROW(a_string VARCHAR), a_int INT)" + - (partitioned ? ", part VARCHAR" : "") + - ")" + - "WITH (" + - (partitioned ? " partitioned_by = ARRAY['part']," : "") + - "location = 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "'," + - "column_mapping_mode = '" + mode + "'" + - ")"), - partitioned); - } - - @Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, DELTA_LAKE_EXCLUDE_91, - PROFILE_SPECIFIC_TESTS}, dataProvider = "columnMappingWithTrueAndFalseDataProvider") - @Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH) - public void testDeltaColumnMappingModeAllDataTypes(String mode, boolean partitioned) - { - testColumnMappingModeAllDataTypes(tableName -> onDelta().executeQuery("" + - "CREATE TABLE default." + tableName + " (" + - " a_boolean BOOLEAN," + - " a_tinyint TINYINT," + - " a_smallint SMALLINT," + - " a_int INT," + - " a_bigint BIGINT," + - " a_decimal_5_2 DECIMAL(5,2)," + - " a_decimal_21_3 DECIMAL(21,3)," + - " a_double DOUBLE," + - " a_float FLOAT," + - " a_string STRING," + - " a_date DATE," + - " a_timestamp TIMESTAMP," + - " a_binary BINARY," + - " a_string_array ARRAY," + - " a_struct_array ARRAY>," + - " a_map MAP," + - " a_complex_map MAP>," + - " a_struct STRUCT," + - " a_complex_struct STRUCT, a_int: INT>" + - (partitioned ? ", part STRING" : "") + - ")" + - " USING delta " + - (partitioned ? " PARTITIONED BY (part)" : "") + - " LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "'" + - " TBLPROPERTIES (" + - " 'delta.columnMapping.mode'='" + mode + "')"), - partitioned); - } - - private void testColumnMappingModeAllDataTypes(Consumer createTable, boolean partitioned) - { - String tableName = "test_dl_column_mapping_mode_name_all_types_" + randomNameSuffix(); - - createTable.accept(tableName); - - try { - onTrino().executeQuery("" + - "INSERT INTO delta.default." + tableName + - " VALUES " + - "(" + - " true, " + - " 1, " + - " 10," + - " 100, " + - " 1000, " + - " CAST('123.12' AS DECIMAL(5,2)), " + - " CAST('123456789012345678.123' AS DECIMAL(21,3)), " + - " DOUBLE '0', " + - " REAL '0', " + - " 'a', " + - " DATE '2020-08-21', " + - " TIMESTAMP '2020-10-21 01:00:00.123 UTC', " + - " X'abcd', " + - " ARRAY['element 1'], " + - " ARRAY[ROW('nested 1')], " + - " MAP(ARRAY['key'], ARRAY['value1']), " + - " MAP(ARRAY['key'], ARRAY[ROW('nested value1')]), " + - " ROW('item 1', 1), " + - " ROW(ROW('nested item 1'), 11) " + - (partitioned ? ", 'part1'" : "") + - "), " + - "(" + - " true, " + - " 2, " + - " 20," + - " 200, " + - " 2000, " + - " CAST('223.12' AS DECIMAL(5,2)), " + - " CAST('223456789012345678.123' AS DECIMAL(21,3)), " + - " DOUBLE '0', " + - " REAL '0', " + - " 'b', " + - " DATE '2020-08-22', " + - " TIMESTAMP '2020-10-22 02:00:00.456 UTC', " + - " X'abcd', " + - " ARRAY['element 2'], " + - " ARRAY[ROW('nested 2')], " + - " MAP(ARRAY['key'], ARRAY[null]), " + - " MAP(ARRAY['key'], ARRAY[null]), " + - " ROW('item 2', 2), " + - " ROW(ROW('nested item 2'), 22) " + - (partitioned ? ", 'part2'" : "") + - ")"); - - Row firstRow = row(true, 1, 10, 100, 1000L, new BigDecimal("123.12"), new BigDecimal("123456789012345678.123"), 0d, 0f, "a", java.sql.Date.valueOf(LocalDate.of(2020, 8, 21)), new byte[] {(byte) 0xAB, (byte) 0xCD}, "element 1", "nested 1", "value1", "nested value1", "item 1", 1, "nested item 1", 11); - Row secondRow = row(true, 2, 20, 200, 2000L, new BigDecimal("223.12"), new BigDecimal("223456789012345678.123"), 0d, 0f, "b", java.sql.Date.valueOf(LocalDate.of(2020, 8, 22)), new byte[] {(byte) 0xAB, (byte) 0xCD}, "element 2", "nested 2", null, null, "item 2", 2, "nested item 2", 22); - List expectedRows = ImmutableList.of(firstRow, secondRow); - - String selectDeltaValues = "SELECT " + - "a_boolean, a_tinyint, a_smallint, a_int, a_bigint, a_decimal_5_2, a_decimal_21_3, a_double , a_float, a_string, a_date, a_binary, a_string_array[0], a_struct_array[0].a_string, a_map['key'], a_complex_map['key'].a_string, a_struct.a_string, a_struct.a_int, a_complex_struct.nested_struct.a_string, a_complex_struct.a_int " + - "FROM default." + tableName; - String selectTrinoValues = "SELECT " + - "a_boolean, a_tinyint, a_smallint, a_int, a_bigint, a_decimal_5_2, a_decimal_21_3, a_double , a_float, a_string, a_date, a_binary, a_string_array[1], a_struct_array[1].a_string, a_map['key'], a_complex_map['key'].a_string, a_struct.a_string, a_struct.a_int, a_complex_struct.nested_struct.a_string, a_complex_struct.a_int " + - "FROM delta.default." + tableName; - assertThat(onDelta().executeQuery(selectDeltaValues)).containsOnly(expectedRows); - assertThat(onTrino().executeQuery(selectTrinoValues)).containsOnly(expectedRows); - QueryResult selectDatabricksTimestamps = onDelta().executeQuery("SELECT date_format(a_timestamp, \"yyyy-MM-dd HH:mm:ss.SSS\") FROM default." + tableName); - QueryResult selectTrinoTimestamps = onTrino().executeQuery("SELECT format('%1$tF %1$tT.%1$tL', a_timestamp) FROM delta.default.\"" + tableName + "\""); - assertThat(selectDatabricksTimestamps).containsOnly(selectTrinoTimestamps.rows().stream() - .map(QueryAssert.Row::new) - .collect(toImmutableList())); - - onTrino().executeQuery("UPDATE delta.default." + tableName + " SET a_boolean = false where a_tinyint = 1"); - Row updatedFirstRow = row(false, 1, 10, 100, 1000L, new BigDecimal("123.12"), new BigDecimal("123456789012345678.123"), 0d, 0f, "a", java.sql.Date.valueOf(LocalDate.of(2020, 8, 21)), new byte[] {(byte) 0xAB, (byte) 0xCD}, "element 1", "nested 1", "value1", "nested value1", "item 1", 1, "nested item 1", 11); - expectedRows = ImmutableList.of(updatedFirstRow, secondRow); - assertThat(onDelta().executeQuery(selectDeltaValues)).containsOnly(expectedRows); - assertThat(onTrino().executeQuery(selectTrinoValues)).containsOnly(expectedRows); - - onTrino().executeQuery("DELETE FROM delta.default." + tableName + " WHERE a_tinyint = 2"); - expectedRows = ImmutableList.of(updatedFirstRow); - assertThat(onDelta().executeQuery(selectDeltaValues)) - .containsOnly(expectedRows); - assertThat(onTrino().executeQuery(selectTrinoValues)) - .containsOnly(expectedRows); - } - finally { - dropDeltaTableWithRetry("default." + tableName); - } - } - @Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, DELTA_LAKE_EXCLUDE_91, PROFILE_SPECIFIC_TESTS}, dataProvider = "columnMappingDataProvider") @Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH) public void testColumnMappingModeNameWithNonLowerCaseColumn(String mode) @@ -1098,59 +928,6 @@ public Object[][] changeColumnMappingDataProvider() }; } - @Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, DELTA_LAKE_EXCLUDE_91, PROFILE_SPECIFIC_TESTS}, dataProvider = "columnMappingWithTrueAndFalseDataProvider") - @Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH) - public void testOptimizeProcedureColumnMappingMode(String mode, boolean partitioned) - { - String tableName = "test_dl_optimize_column_mapping_mode_" + randomNameSuffix(); - - onDelta().executeQuery("" + - "CREATE TABLE default." + tableName + - "(a_number INT, a_struct STRUCT, a_string STRING) " + - "USING delta " + - (partitioned ? "PARTITIONED BY (a_string)" : "") + - "LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "'" + - "TBLPROPERTIES ('delta.columnMapping.mode'='" + mode + "')"); - - try { - onTrino().executeQuery("INSERT INTO delta.default." + tableName + " VALUES (1, row(11), 'a')"); - onTrino().executeQuery("INSERT INTO delta.default." + tableName + " VALUES (2, row(22), 'b')"); - onTrino().executeQuery("INSERT INTO delta.default." + tableName + " VALUES (3, row(33), 'c')"); - - Double stringColumnSize = partitioned ? null : 3.0; - List expectedStats = ImmutableList.builder() - .add(row("a_number", null, 3.0, 0.0, null, "1", "3")) - .add(row("a_struct", null, null, null, null, null, null)) - .add(row("a_string", stringColumnSize, 3.0, 0.0, null, null, null)) - .add(row(null, null, null, null, 3.0, null, null)) - .build(); - assertThat(onTrino().executeQuery("SHOW STATS FOR delta.default." + tableName)) - .containsOnly(expectedStats); - - // Execute OPTIMIZE procedure and verify that the statistics is preserved and the table is still writable and readable - onTrino().executeQuery("ALTER TABLE delta.default." + tableName + " EXECUTE OPTIMIZE"); - - assertThat(onTrino().executeQuery("SHOW STATS FOR delta.default." + tableName)) - .containsOnly(expectedStats); - - onTrino().executeQuery("INSERT INTO delta.default." + tableName + " VALUES (4, row(44), 'd')"); - onDelta().executeQuery("INSERT INTO default." + tableName + " VALUES (5, named_struct('x',55), 'e')"); - - List expectedRows = ImmutableList.builder() - .add(row(1, 11, "a")) - .add(row(2, 22, "b")) - .add(row(3, 33, "c")) - .add(row(4, 44, "d")) - .add(row(5, 55, "e")) - .build(); - assertThat(onTrino().executeQuery("SELECT a_number, a_struct.x, a_string FROM delta.default." + tableName)).contains(expectedRows); - assertThat(onDelta().executeQuery("SELECT a_number, a_struct.x, a_string FROM default." + tableName)).contains(expectedRows); - } - finally { - dropDeltaTableWithRetry("default." + tableName); - } - } - @Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, DELTA_LAKE_EXCLUDE_91, PROFILE_SPECIFIC_TESTS}, dataProvider = "columnMappingDataProvider") @Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH) public void testTrinoSpecialCharacterColumnNamesWithColumnMappingMode(String mode) @@ -1201,160 +978,6 @@ private void testSpecialCharacterColumnNamesWithColumnMappingMode(Consumer>, nested STRUCT)" + - " USING delta " + - " LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "'" + - " TBLPROPERTIES (" + - " 'delta.checkpointInterval' = 1, " + - " 'delta.checkpoint.writeStatsAsJson' = " + statsAsJsonEnabled + ", " + - " 'delta.checkpoint.writeStatsAsStruct' = " + !statsAsJsonEnabled + ", " + - " 'delta.columnMapping.mode' = '" + mode + "'" + - ")"); - - try { - String trinoColumns = "a_number, a_string, array_col[1].array_struct_element, nested.field1"; - String deltaColumns = "a_number, a_string, array_col[0].array_struct_element, nested.field1"; - - onTrino().executeQuery("INSERT INTO delta.default." + tableName + - " VALUES (1, 'first value', ARRAY[ROW('nested 1')], ROW('databricks 1'))," + - " (2, 'two', ARRAY[ROW('nested 2')], ROW('databricks 2'))," + - " (3, 'third value', ARRAY[ROW('nested 3')], ROW('databricks 3'))," + - " (4, 'four', ARRAY[ROW('nested 4')], ROW('databricks 4'))"); - assertDeltaTrinoTableEquals(tableName, trinoColumns, deltaColumns, ImmutableList.of( - row(1, "first value", "nested 1", "databricks 1"), - row(2, "two", "nested 2", "databricks 2"), - row(3, "third value", "nested 3", "databricks 3"), - row(4, "four", "nested 4", "databricks 4"))); - - assertThat(onTrino().executeQuery("SHOW STATS FOR delta.default." + tableName)) - .containsOnly(ImmutableList.of( - row("a_number", null, 4.0, 0.0, null, "1", "4"), - row("a_string", 29.0, 4.0, 0.0, null, null, null), - row("array_col", null, null, null, null, null, null), - row("nested", null, null, null, null, null, null), - row(null, null, null, null, 4.0, null, null))); - - onTrino().executeQuery("UPDATE delta.default." + tableName + " SET a_number = a_number + 10 WHERE a_number in (3, 4)"); - onDelta().executeQuery("UPDATE default." + tableName + " SET a_number = a_number + 20 WHERE a_number in (1, 2)"); - assertDeltaTrinoTableEquals(tableName, trinoColumns, deltaColumns, ImmutableList.of( - row(21, "first value", "nested 1", "databricks 1"), - row(22, "two", "nested 2", "databricks 2"), - row(13, "third value", "nested 3", "databricks 3"), - row(14, "four", "nested 4", "databricks 4"))); - - assertThat(onTrino().executeQuery("SHOW STATS FOR delta.default." + tableName)) - .containsOnly(ImmutableList.of( - row("a_number", null, 4.0, 0.0, null, "13", "22"), - row("a_string", 29.0, 4.0, 0.0, null, null, null), - row("array_col", null, null, null, null, null, null), - row("nested", null, null, null, null, null, null), - row(null, null, null, null, 4.0, null, null))); - - onTrino().executeQuery("DELETE FROM delta.default." + tableName + " WHERE a_number = 22"); - onTrino().executeQuery("DELETE FROM delta.default." + tableName + " WHERE a_number = 13"); - onDelta().executeQuery("DELETE FROM default." + tableName + " WHERE a_number = 21"); - assertDeltaTrinoTableEquals(tableName, trinoColumns, deltaColumns, ImmutableList.of( - row(14, "four", "nested 4", "databricks 4"))); - - assertThat(onTrino().executeQuery("SHOW STATS FOR delta.default." + tableName)) - .containsOnly(ImmutableList.of( - row("a_number", null, 1.0, 0.0, null, "14", "14"), - row("a_string", 29.0, 1.0, 0.0, null, null, null), - row("array_col", null, null, null, null, null, null), - row("nested", null, null, null, null, null, null), - row(null, null, null, null, 1.0, null, null))); - } - finally { - dropDeltaTableWithRetry("default." + tableName); - } - } - - @Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, DELTA_LAKE_EXCLUDE_91, PROFILE_SPECIFIC_TESTS}, dataProvider = "columnMappingWithTrueAndFalseDataProvider") - @Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH) - public void testSupportedPartitionedColumnMappingWrites(String mode, boolean statsAsJsonEnabled) - { - String tableName = "test_dl_dml_column_mapping_mode_" + mode + randomNameSuffix(); - - onDelta().executeQuery("" + - "CREATE TABLE default." + tableName + - " (a_number INT, a_string STRING, array_col ARRAY>, nested STRUCT)" + - " USING delta " + - " PARTITIONED BY (a_string)" + - " LOCATION 's3://" + bucketName + "/databricks-compatibility-test-" + tableName + "'" + - " TBLPROPERTIES (" + - " 'delta.checkpointInterval' = 1, " + - " 'delta.checkpoint.writeStatsAsJson' = " + statsAsJsonEnabled + ", " + - " 'delta.checkpoint.writeStatsAsStruct' = " + !statsAsJsonEnabled + ", " + - " 'delta.columnMapping.mode' = '" + mode + "'" + - ")"); - - try { - String trinoColumns = "a_number, a_string, array_col[1].array_struct_element, nested.field1"; - String deltaColumns = "a_number, a_string, array_col[0].array_struct_element, nested.field1"; - - onTrino().executeQuery("INSERT INTO delta.default." + tableName + - " VALUES (1, 'first value', ARRAY[ROW('nested 1')], ROW('databricks 1'))," + - " (2, 'two', ARRAY[ROW('nested 2')], ROW('databricks 2'))," + - " (3, 'third value', ARRAY[ROW('nested 3')], ROW('databricks 3'))," + - " (4, 'four', ARRAY[ROW('nested 4')], ROW('databricks 4'))"); - - assertDeltaTrinoTableEquals(tableName, trinoColumns, deltaColumns, ImmutableList.of( - row(1, "first value", "nested 1", "databricks 1"), - row(2, "two", "nested 2", "databricks 2"), - row(3, "third value", "nested 3", "databricks 3"), - row(4, "four", "nested 4", "databricks 4"))); - - assertThat(onTrino().executeQuery("SHOW STATS FOR delta.default." + tableName)) - .containsOnly(ImmutableList.of( - row("a_number", null, 4.0, 0.0, null, "1", "4"), - row("a_string", null, 4.0, 0.0, null, null, null), - row("array_col", null, null, null, null, null, null), - row("nested", null, null, null, null, null, null), - row(null, null, null, null, 4.0, null, null))); - - onTrino().executeQuery("UPDATE delta.default." + tableName + " SET a_number = a_number + 10 WHERE a_number in (3, 4)"); - onDelta().executeQuery("UPDATE default." + tableName + " SET a_number = a_number + 20 WHERE a_number in (1, 2)"); - assertDeltaTrinoTableEquals(tableName, trinoColumns, deltaColumns, ImmutableList.of( - row(21, "first value", "nested 1", "databricks 1"), - row(22, "two", "nested 2", "databricks 2"), - row(13, "third value", "nested 3", "databricks 3"), - row(14, "four", "nested 4", "databricks 4"))); - - assertThat(onTrino().executeQuery("SHOW STATS FOR delta.default." + tableName)) - .containsOnly(ImmutableList.of( - row("a_number", null, 4.0, 0.0, null, "13", "22"), - row("a_string", null, 4.0, 0.0, null, null, null), - row("array_col", null, null, null, null, null, null), - row("nested", null, null, null, null, null, null), - row(null, null, null, null, 4.0, null, null))); - - onTrino().executeQuery("DELETE FROM delta.default." + tableName + " WHERE a_number = 22"); - onTrino().executeQuery("DELETE FROM delta.default." + tableName + " WHERE a_number = 13"); - onDelta().executeQuery("DELETE FROM default." + tableName + " WHERE a_number = 21"); - assertDeltaTrinoTableEquals(tableName, trinoColumns, deltaColumns, ImmutableList.of( - row(14, "four", "nested 4", "databricks 4"))); - - assertThat(onTrino().executeQuery("SHOW STATS FOR delta.default." + tableName)) - .containsOnly(ImmutableList.of( - row("a_number", null, 1.0, 0.0, null, "14", "14"), - row("a_string", null, 1.0, 0.0, null, null, null), - row("array_col", null, null, null, null, null, null), - row("nested", null, null, null, null, null, null), - row(null, null, null, null, 1.0, null, null))); - } - finally { - dropDeltaTableWithRetry("default." + tableName); - } - } - @Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, DELTA_LAKE_EXCLUDE_91, PROFILE_SPECIFIC_TESTS}, dataProvider = "supportedColumnMappingForDmlDataProvider") @Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH) public void testMergeUpdateWithColumnMapping(String mode) @@ -1473,12 +1096,6 @@ private void assertDeltaTrinoTableEquals(String tableName, String trinoQuery, St .containsOnly(expectedRows); } - @DataProvider - public Object[][] columnMappingWithTrueAndFalseDataProvider() - { - return cartesianProduct(supportedColumnMappingForDmlDataProvider(), trueFalse()); - } - @Test(groups = {DELTA_LAKE_DATABRICKS, DELTA_LAKE_OSS, DELTA_LAKE_EXCLUDE_91, DELTA_LAKE_EXCLUDE_104, PROFILE_SPECIFIC_TESTS}, dataProvider = "columnMappingDataProvider") @Flaky(issue = DATABRICKS_COMMUNICATION_FAILURE_ISSUE, match = DATABRICKS_COMMUNICATION_FAILURE_MATCH) public void testTrinoDropColumnWithColumnMappingMode(String mode)