Skip to content

Commit b06be75

Browse files
committed
Fix reading variant null values in Delta Lake
In cases where a value chunk includes both null and non-null entries, the chunk must be treated as potentially nullable.
1 parent 2a0ff84 commit b06be75

File tree

6 files changed

+8
-7
lines changed

6 files changed

+8
-7
lines changed

lib/trino-parquet/src/main/java/io/trino/parquet/ParquetTypeUtils.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,14 +299,14 @@ private static Optional<Field> constructField(Type type, ColumnIO columnIO, bool
299299
if (!(columnIO instanceof GroupColumnIO groupColumnIo)) {
300300
throw new IllegalStateException("Expected columnIO to be GroupColumnIO but got %s".formatted(columnIO.getClass().getSimpleName()));
301301
}
302-
Field valueField = constructField(VARBINARY, groupColumnIo.getChild(0), false).orElseThrow();
302+
PrimitiveField valueField = (PrimitiveField) constructField(VARBINARY, groupColumnIo.getChild(0), false).orElseThrow();
303303
PrimitiveField metadataField = (PrimitiveField) constructField(VARBINARY, groupColumnIo.getChild(1), false).orElseThrow();
304304
return Optional.of(new VariantField(
305305
type,
306306
repetitionLevel,
307307
definitionLevel,
308308
required,
309-
valueField,
309+
new PrimitiveField(valueField.getType(), false, valueField.getDescriptor(), valueField.getId()),
310310
// Mark the metadata field as optional, this is because the metadata field is not present when the actual Variant value is null
311311
new PrimitiveField(metadataField.getType(), false, metadataField.getDescriptor(), metadataField.getId())));
312312
}

plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,6 @@ INSERT INTO test_variant_null values
1111
(1, parse_json('{"a":1}')),
1212
(2, parse_json('{"a":2}')),
1313
(3, parse_json('null')),
14-
(4, NULL);
14+
(4, NULL),
15+
(5, parse_json('{"a":5}'));
1516
```
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
{"commitInfo":{"timestamp":1750987703896,"userId":"4222103729284476","userName":"[email protected]","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.enableDeletionVectors\":\"true\"}","statsOnLoad":false},"notebook":{"notebookId":"2325234880148729"},"clusterId":"1002-064054-nbosugsx","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"c677ecaa-0b37-4998-a698-7fe58fdddc2e"}}
2-
{"metaData":{"id":"7113656c-e75c-486e-bcca-35fb29c6e4ce","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"x\",\"type\":\"variant\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1750987703781}}
1+
{"commitInfo":{"timestamp":1752418845893,"userId":"4222103729284476","userName":"[email protected]","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.enableDeletionVectors\":\"true\"}","statsOnLoad":false},"notebook":{"notebookId":"2325234880148729"},"clusterId":"1002-064054-nbosugsx","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"7a8ee623-3291-4bb7-9b54-e1d7fa6f7bbb"}}
2+
{"metaData":{"id":"f363d3d3-6ad6-4228-a48a-53d4e60f58a3","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"x\",\"type\":\"variant\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1752418845102}}
33
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","variantType-preview"],"writerFeatures":["deletionVectors","variantType-preview"]}}
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
{"commitInfo":{"timestamp":1750987827929,"userId":"4222103729284476","userName":"[email protected]","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"2325234880148729"},"clusterId":"1002-064054-nbosugsx","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"4","numOutputBytes":"1082"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"f08b2495-6911-4949-b784-9c1ac49f96d9"}}
2-
{"add":{"path":"part-00000-370c7cfb-47f7-4c81-81bd-3ecd7b5b4c41-c000.snappy.parquet","partitionValues":{},"size":1082,"modificationTime":1750987828000,"dataChange":true,"stats":"{\"numRecords\":4,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":4},\"nullCount\":{\"id\":0,\"x\":1},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1750987828000000","MIN_INSERTION_TIME":"1750987828000000","MAX_INSERTION_TIME":"1750987828000000","OPTIMIZE_TARGET_SIZE":"268435456"}}}
1+
{"commitInfo":{"timestamp":1752418883651,"userId":"4222103729284476","userName":"[email protected]","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"2325234880148729"},"clusterId":"1002-064054-nbosugsx","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"1094"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"57b16f5c-8c44-4349-b47e-7a70373f38e5"}}
2+
{"add":{"path":"part-00000-3dae12c4-61bc-4177-bd36-2c936db81e90-c000.snappy.parquet","partitionValues":{},"size":1094,"modificationTime":1752418883000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":5},\"nullCount\":{\"id\":0,\"x\":1},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1752418883000000","MIN_INSERTION_TIME":"1752418883000000","MAX_INSERTION_TIME":"1752418883000000","OPTIMIZE_TARGET_SIZE":"268435456"}}}

0 commit comments

Comments
 (0)