Skip to content

Commit 21b8e36

Browse files
committed
Add support for reading UniForm with Iceberg in Delta Lake
1 parent f198b32 commit 21b8e36

20 files changed

+505
-1
lines changed

plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/DeltaLakeSchemaSupport.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ private DeltaLakeSchemaSupport() {}
100100
private static final String CHECK_CONSTRAINTS_FEATURE_NAME = "checkConstraints";
101101
private static final String COLUMN_MAPPING_FEATURE_NAME = "columnMapping";
102102
private static final String DELETION_VECTORS_FEATURE_NAME = "deletionVectors";
103+
private static final String ICEBERG_COMPATIBILITY_V1_FEATURE_NAME = "icebergCompatV1";
104+
private static final String ICEBERG_COMPATIBILITY_V2_FEATURE_NAME = "icebergCompatV2";
103105
private static final String IDENTITY_COLUMNS_FEATURE_NAME = "identityColumns";
104106
private static final String INVARIANTS_FEATURE_NAME = "invariants";
105107
public static final String TIMESTAMP_NTZ_FEATURE_NAME = "timestampNtz";
@@ -184,6 +186,12 @@ public static boolean isDeletionVectorEnabled(MetadataEntry metadataEntry, Proto
184186
public static ColumnMappingMode getColumnMappingMode(MetadataEntry metadata, ProtocolEntry protocolEntry)
185187
{
186188
if (protocolEntry.supportsReaderFeatures() || protocolEntry.supportsWriterFeatures()) {
189+
if (protocolEntry.writerFeaturesContains(ICEBERG_COMPATIBILITY_V1_FEATURE_NAME) || protocolEntry.writerFeaturesContains(ICEBERG_COMPATIBILITY_V2_FEATURE_NAME)) {
190+
String columnMappingMode = metadata.getConfiguration().get(COLUMN_MAPPING_MODE_CONFIGURATION_KEY);
191+
checkArgument(columnMappingMode != null && columnMappingMode.equals("name"), "Column mapping mode must be 'name' for Iceberg compatibility: %s", columnMappingMode);
192+
return ColumnMappingMode.NAME;
193+
}
194+
187195
boolean supportsColumnMappingReader = protocolEntry.readerFeaturesContains(COLUMN_MAPPING_FEATURE_NAME);
188196
boolean supportsColumnMappingWriter = protocolEntry.writerFeaturesContains(COLUMN_MAPPING_FEATURE_NAME);
189197
checkArgument(

plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,9 @@ public class TestDeltaLakeBasic
9999
new ResourceTable("deletion_vectors", "databricks122/deletion_vectors"),
100100
new ResourceTable("liquid_clustering", "deltalake/liquid_clustering"),
101101
new ResourceTable("timestamp_ntz", "databricks131/timestamp_ntz"),
102-
new ResourceTable("timestamp_ntz_partition", "databricks131/timestamp_ntz_partition"));
102+
new ResourceTable("timestamp_ntz_partition", "databricks131/timestamp_ntz_partition"),
103+
new ResourceTable("uniform_iceberg_v1", "databricks133/uniform_iceberg_v1"),
104+
new ResourceTable("uniform_iceberg_v2", "databricks143/uniform_iceberg_v2"));
103105

104106
// The col-{uuid} pattern for delta.columnMapping.physicalName
105107
private static final Pattern PHYSICAL_COLUMN_NAME_PATTERN = Pattern.compile("^col-[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$");
@@ -959,6 +961,26 @@ public void testLiquidClustering()
959961
assertQueryFails("INSERT INTO liquid_clustering VALUES ('test 3', 2024, 3)", "Unsupported writer features: .*");
960962
}
961963

964+
/**
965+
* @see databricks133.uniform_iceberg_v1
966+
*/
967+
@Test
968+
public void testUniFormIcebergV1()
969+
{
970+
assertQuery("SELECT * FROM uniform_iceberg_v1", "VALUES (1, 'test data')");
971+
assertQueryFails("INSERT INTO uniform_iceberg_v1 VALUES (2, 'new data')", "\\QUnsupported writer features: [icebergCompatV1]");
972+
}
973+
974+
/**
975+
* @see databricks143.uniform_iceberg_v2
976+
*/
977+
@Test
978+
public void testUniFormIcebergV2()
979+
{
980+
assertQuery("SELECT * FROM uniform_iceberg_v2", "VALUES (1, 'test data')");
981+
assertQueryFails("INSERT INTO uniform_iceberg_v2 VALUES (2, 'new data')", "\\QUnsupported writer features: [icebergCompatV2]");
982+
}
983+
962984
@Test
963985
public void testCorruptedManagedTableLocation()
964986
throws Exception
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
Data generated using Databricks 13.3 with Unity.
2+
`delta.universalFormat.enabledFormats` requires Unity catalog.
3+
4+
```sql
5+
CREATE TABLE main.default.test_uniform_iceberg_v1
6+
(a integer, b string)
7+
USING DELTA
8+
TBLPROPERTIES (
9+
'delta.enableIcebergCompatV1' = 'true',
10+
'delta.universalFormat.enabledFormats' = 'iceberg'
11+
);
12+
13+
INSERT INTO main.default.test_uniform_iceberg_v1 VALUES (1, 'test data');
14+
```
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"commitInfo":{"timestamp":1717737493887,"userId":"7853186923043731","userName":"[email protected]","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","description":null,"isManaged":"true","properties":"{\"delta.enableIcebergCompatV1\":\"true\",\"delta.universalFormat.enabledFormats\":\"iceberg\"}","statsOnLoad":false},"notebook":{"notebookId":"1841155838656679"},"clusterId":"0213-045432-cqrij0nb","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"a48af02b-55e8-411b-b278-876b33ea6270"}}
2+
{"metaData":{"id":"2543f7c2-17c5-421d-b270-773af5654cec","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"col-7cccf654-9a0b-463e-add7-da6899c1c97b\"}},{\"name\":\"b\",\"type\":\"string\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":2,\"delta.columnMapping.physicalName\":\"col-f272ebe6-6618-4a5c-8dd6-0793d2204587\"}}]}","partitionColumns":[],"configuration":{"delta.enableIcebergCompatV1":"true","delta.universalFormat.enabledFormats":"iceberg","delta.columnMapping.mode":"name","delta.columnMapping.maxColumnId":"2"},"createdTime":1717737492488}}
3+
{"protocol":{"minReaderVersion":2,"minWriterVersion":7,"writerFeatures":["columnMapping","icebergCompatV1"]}}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{"commitInfo":{"timestamp":1717737560500,"userId":"7853186923043731","userName":"[email protected]","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"1841155838656679"},"clusterId":"0213-045432-cqrij0nb","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"1320"},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"18b114d7-eaa8-4279-87e3-9136a3ba510d"}}
2+
{"add":{"path":"vL/part-00000-a3b8b550-b0f5-4585-beef-fc82aea5d233-c000.snappy.parquet","partitionValues":{},"size":1320,"modificationTime":1717737559000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"col-7cccf654-9a0b-463e-add7-da6899c1c97b\":1,\"col-f272ebe6-6618-4a5c-8dd6-0793d2204587\":\"test data\"},\"maxValues\":{\"col-7cccf654-9a0b-463e-add7-da6899c1c97b\":1,\"col-f272ebe6-6618-4a5c-8dd6-0793d2204587\":\"test data\"},\"nullCount\":{\"col-7cccf654-9a0b-463e-add7-da6899c1c97b\":0,\"col-f272ebe6-6618-4a5c-8dd6-0793d2204587\":0}}","tags":{"INSERTION_TIME":"1717737559000000","MIN_INSERTION_TIME":"1717737559000000","MAX_INSERTION_TIME":"1717737559000000","OPTIMIZE_TARGET_SIZE":"67108864"}}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
{
2+
"format-version" : 1,
3+
"table-uuid" : "0eb73468-144c-44fb-911c-8cee8bf441aa",
4+
"location" : "s3://trino-ci-test/databricks-unity/af968baf-af29-4111-8bd7-4a67d3a3a39f/tables/867ed8f6-384c-42b7-a589-76551ed2dea1",
5+
"last-updated-ms" : 1717737524993,
6+
"last-column-id" : 2,
7+
"schema" : {
8+
"type" : "struct",
9+
"schema-id" : 0,
10+
"fields" : [ {
11+
"id" : 1,
12+
"name" : "a",
13+
"required" : false,
14+
"type" : "int"
15+
}, {
16+
"id" : 2,
17+
"name" : "b",
18+
"required" : false,
19+
"type" : "string"
20+
} ]
21+
},
22+
"current-schema-id" : 0,
23+
"schemas" : [ {
24+
"type" : "struct",
25+
"schema-id" : 0,
26+
"fields" : [ {
27+
"id" : 1,
28+
"name" : "a",
29+
"required" : false,
30+
"type" : "int"
31+
}, {
32+
"id" : 2,
33+
"name" : "b",
34+
"required" : false,
35+
"type" : "string"
36+
} ]
37+
} ],
38+
"partition-spec" : [ ],
39+
"default-spec-id" : 0,
40+
"partition-specs" : [ {
41+
"spec-id" : 0,
42+
"fields" : [ ]
43+
} ],
44+
"last-partition-id" : 999,
45+
"default-sort-order-id" : 0,
46+
"sort-orders" : [ {
47+
"order-id" : 0,
48+
"fields" : [ ]
49+
} ],
50+
"properties" : {
51+
"schema.name-mapping.default" : "[ {\n \"field-id\" : 1,\n \"names\" : [ \"a\" ]\n}, {\n \"field-id\" : 2,\n \"names\" : [ \"b\" ]\n} ]",
52+
"delta-timestamp" : "1717737495000",
53+
"delta-version" : "-1"
54+
},
55+
"current-snapshot-id" : -1,
56+
"refs" : { },
57+
"snapshots" : [ ],
58+
"statistics" : [ ],
59+
"snapshot-log" : [ ],
60+
"metadata-log" : [ ]
61+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
{
2+
"format-version" : 1,
3+
"table-uuid" : "0eb73468-144c-44fb-911c-8cee8bf441aa",
4+
"location" : "s3://trino-ci-test/databricks-unity/af968baf-af29-4111-8bd7-4a67d3a3a39f/tables/867ed8f6-384c-42b7-a589-76551ed2dea1",
5+
"last-updated-ms" : 1717737525809,
6+
"last-column-id" : 2,
7+
"schema" : {
8+
"type" : "struct",
9+
"schema-id" : 0,
10+
"fields" : [ {
11+
"id" : 1,
12+
"name" : "a",
13+
"required" : false,
14+
"type" : "int"
15+
}, {
16+
"id" : 2,
17+
"name" : "b",
18+
"required" : false,
19+
"type" : "string"
20+
} ]
21+
},
22+
"current-schema-id" : 0,
23+
"schemas" : [ {
24+
"type" : "struct",
25+
"schema-id" : 0,
26+
"fields" : [ {
27+
"id" : 1,
28+
"name" : "a",
29+
"required" : false,
30+
"type" : "int"
31+
}, {
32+
"id" : 2,
33+
"name" : "b",
34+
"required" : false,
35+
"type" : "string"
36+
} ]
37+
} ],
38+
"partition-spec" : [ ],
39+
"default-spec-id" : 0,
40+
"partition-specs" : [ {
41+
"spec-id" : 0,
42+
"fields" : [ ]
43+
} ],
44+
"last-partition-id" : 999,
45+
"default-sort-order-id" : 0,
46+
"sort-orders" : [ {
47+
"order-id" : 0,
48+
"fields" : [ ]
49+
} ],
50+
"properties" : {
51+
"schema.name-mapping.default" : "[ {\n \"field-id\" : 1,\n \"names\" : [ \"a\" ]\n}, {\n \"field-id\" : 2,\n \"names\" : [ \"b\" ]\n} ]",
52+
"delta-timestamp" : "1717737495000",
53+
"delta-version" : "0"
54+
},
55+
"current-snapshot-id" : -1,
56+
"refs" : { },
57+
"snapshots" : [ ],
58+
"statistics" : [ ],
59+
"snapshot-log" : [ ],
60+
"metadata-log" : [ {
61+
"timestamp-ms" : 1717737524993,
62+
"metadata-file" : "s3://trino-ci-test/databricks-unity/af968baf-af29-4111-8bd7-4a67d3a3a39f/tables/867ed8f6-384c-42b7-a589-76551ed2dea1/metadata/00000-a6f70674-5338-4b57-b07e-cd4b3253fa44.metadata.json"
63+
} ]
64+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
{
2+
"format-version" : 1,
3+
"table-uuid" : "0eb73468-144c-44fb-911c-8cee8bf441aa",
4+
"location" : "s3://trino-ci-test/databricks-unity/af968baf-af29-4111-8bd7-4a67d3a3a39f/tables/867ed8f6-384c-42b7-a589-76551ed2dea1",
5+
"last-updated-ms" : 1717737567454,
6+
"last-column-id" : 2,
7+
"schema" : {
8+
"type" : "struct",
9+
"schema-id" : 0,
10+
"fields" : [ {
11+
"id" : 1,
12+
"name" : "a",
13+
"required" : false,
14+
"type" : "int"
15+
}, {
16+
"id" : 2,
17+
"name" : "b",
18+
"required" : false,
19+
"type" : "string"
20+
} ]
21+
},
22+
"current-schema-id" : 0,
23+
"schemas" : [ {
24+
"type" : "struct",
25+
"schema-id" : 0,
26+
"fields" : [ {
27+
"id" : 1,
28+
"name" : "a",
29+
"required" : false,
30+
"type" : "int"
31+
}, {
32+
"id" : 2,
33+
"name" : "b",
34+
"required" : false,
35+
"type" : "string"
36+
} ]
37+
} ],
38+
"partition-spec" : [ ],
39+
"default-spec-id" : 0,
40+
"partition-specs" : [ {
41+
"spec-id" : 0,
42+
"fields" : [ ]
43+
} ],
44+
"last-partition-id" : 999,
45+
"default-sort-order-id" : 0,
46+
"sort-orders" : [ {
47+
"order-id" : 0,
48+
"fields" : [ ]
49+
} ],
50+
"properties" : {
51+
"schema.name-mapping.default" : "[ {\n \"field-id\" : 1,\n \"names\" : [ \"a\" ]\n}, {\n \"field-id\" : 2,\n \"names\" : [ \"b\" ]\n} ]",
52+
"delta-timestamp" : "1717737561000",
53+
"delta-version" : "1"
54+
},
55+
"current-snapshot-id" : 3448407241734536873,
56+
"refs" : {
57+
"main" : {
58+
"snapshot-id" : 3448407241734536873,
59+
"type" : "branch"
60+
}
61+
},
62+
"snapshots" : [ {
63+
"snapshot-id" : 3448407241734536873,
64+
"timestamp-ms" : 1717737567285,
65+
"summary" : {
66+
"operation" : "append",
67+
"added-data-files" : "1",
68+
"added-records" : "1",
69+
"added-files-size" : "1320",
70+
"changed-partition-count" : "1",
71+
"total-records" : "1",
72+
"total-files-size" : "1320",
73+
"total-data-files" : "1",
74+
"total-delete-files" : "0",
75+
"total-position-deletes" : "0",
76+
"total-equality-deletes" : "0"
77+
},
78+
"manifest-list" : "s3://trino-ci-test/databricks-unity/af968baf-af29-4111-8bd7-4a67d3a3a39f/tables/867ed8f6-384c-42b7-a589-76551ed2dea1/metadata/snap-3448407241734536873-1-2dc6572c-92dd-4786-80d0-324b424124b3.avro",
79+
"schema-id" : 0
80+
} ],
81+
"statistics" : [ ],
82+
"snapshot-log" : [ {
83+
"timestamp-ms" : 1717737567285,
84+
"snapshot-id" : 3448407241734536873
85+
} ],
86+
"metadata-log" : [ {
87+
"timestamp-ms" : 1717737524993,
88+
"metadata-file" : "s3://trino-ci-test/databricks-unity/af968baf-af29-4111-8bd7-4a67d3a3a39f/tables/867ed8f6-384c-42b7-a589-76551ed2dea1/metadata/00000-a6f70674-5338-4b57-b07e-cd4b3253fa44.metadata.json"
89+
}, {
90+
"timestamp-ms" : 1717737525809,
91+
"metadata-file" : "s3://trino-ci-test/databricks-unity/af968baf-af29-4111-8bd7-4a67d3a3a39f/tables/867ed8f6-384c-42b7-a589-76551ed2dea1/metadata/00001-15a4900d-0b87-4ca8-93cf-ebde86de39aa.metadata.json"
92+
} ]
93+
}

0 commit comments

Comments
 (0)