diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java index 90c2b8b57a75..00b9b937d98b 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java @@ -519,21 +519,16 @@ private ColumnChunk readVariant(VariantField field) int positionCount = metadataChunk.getBlock().getPositionCount(); BlockBuilder variantBlock = VARCHAR.createBlockBuilder(null, max(1, positionCount)); - if (positionCount == 0) { - variantBlock.appendNull(); - } - else { - ColumnChunk valueChunk = readColumnChunk(field.getValue()); - for (int position = 0; position < positionCount; position++) { - Slice metadata = VARBINARY.getSlice(metadataChunk.getBlock(), position); - if (metadata.length() == 0) { - variantBlock.appendNull(); - continue; - } - Slice value = VARBINARY.getSlice(valueChunk.getBlock(), position); - Variant variant = new Variant(value.getBytes(), metadata.getBytes()); - VARCHAR.writeSlice(variantBlock, utf8Slice(variant.toJson(zoneId))); + ColumnChunk valueChunk = readColumnChunk(field.getValue()); + for (int position = 0; position < positionCount; position++) { + Slice metadata = VARBINARY.getSlice(metadataChunk.getBlock(), position); + if (metadata.length() == 0) { + variantBlock.appendNull(); + continue; } + Slice value = VARBINARY.getSlice(valueChunk.getBlock(), position); + Variant variant = new Variant(value.getBytes(), metadata.getBytes()); + VARCHAR.writeSlice(variantBlock, utf8Slice(variant.toJson(zoneId))); } return new ColumnChunk(variantBlock.build(), metadataChunk.getDefinitionLevels(), metadataChunk.getRepetitionLevels()); } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java index 27db0930313b..6756c069b330 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java @@ -1613,6 +1613,9 @@ public void testVariant() assertQueryFails("INSERT INTO variant VALUES (2, null, null, null, null, 'new data')", "Unsupported writer features: .*"); } + /** + * @see databricks154.test_variant_null + */ @Test public void testVariantReadNull() throws Exception @@ -1626,11 +1629,22 @@ public void testVariantReadNull() .matches("VALUES 3"); assertThat(query("SELECT * FROM " + tableName + " WHERE id = 3")) - .matches("VALUES (3, JSON 'null')"); + .skippingTypesCheck() + .matches("VALUES (3, JSON 'null', NULL)"); assertThat(query("SELECT * FROM " + tableName + " WHERE id = 4")) - .matches("VALUES (4, CAST(NULL AS JSON))"); + .skippingTypesCheck() + .matches("VALUES (4, NULL, NULL)"); assertThat(query("SELECT id FROM " + tableName + " WHERE x IS NULL")) .matches("VALUES 4"); + + assertThat(query("TABLE " + tableName)) + .skippingTypesCheck() + .matches("VALUES " + + "(1, JSON '{\"a\":1}', MAP(ARRAY['key1'], ARRAY[NULL]))," + + "(2, JSON '{\"a\":2}', MAP(ARRAY['key1'], ARRAY[JSON '{\"key\":\"value\"}']))," + + "(3, JSON 'null', NULL)," + + "(4, NULL, NULL)," + + "(5, JSON '{\"a\":5}', NULL)"); } /** diff --git a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/README.md b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/README.md index 80289dacb918..8bd750834a36 100644 --- a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/README.md +++ b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/README.md @@ -3,14 +3,15 @@ Data generated using Databricks 15.4: ```sql CREATE TABLE test_variant_null ( id INT, -x VARIANT +x VARIANT, +y MAP ) USING DELTA LOCATION ?; -INSERT INTO test_variant_null values -(1, parse_json('{"a":1}')), -(2, parse_json('{"a":2}')), -(3, parse_json('null')), -(4, NULL), -(5, parse_json('{"a":5}')); +INSERT INTO test_variant_null values +(1, parse_json('{"a":1}'), map('key1', NULL)), +(2, parse_json('{"a":2}'), map('key1', parse_json('{"key":"value"}'))), +(3, parse_json('null'), NULL), +(4, NULL, NULL), +(5, parse_json('{"a":5}'), NULL); ``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/_delta_log/00000000000000000000.json index 407e8afbab20..a7209e6edc30 100644 --- a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/_delta_log/00000000000000000000.json +++ b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/_delta_log/00000000000000000000.json @@ -1,3 +1,3 @@ -{"commitInfo":{"timestamp":1752418845893,"userId":"4222103729284476","userName":"jian.chen@starburstdata.com","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.enableDeletionVectors\":\"true\"}","statsOnLoad":false},"notebook":{"notebookId":"2325234880148729"},"clusterId":"1002-064054-nbosugsx","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"7a8ee623-3291-4bb7-9b54-e1d7fa6f7bbb"}} -{"metaData":{"id":"f363d3d3-6ad6-4228-a48a-53d4e60f58a3","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"x\",\"type\":\"variant\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1752418845102}} +{"commitInfo":{"timestamp":1758692471407,"userId":"4222103729284476","userName":"jian.chen@starburstdata.com","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.enableDeletionVectors\":\"true\"}","statsOnLoad":false},"notebook":{"notebookId":"2325234880148729"},"clusterId":"1002-064054-nbosugsx","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"756c0581-29cf-436f-a2fe-8251bf671f32"}} +{"metaData":{"id":"94ac345a-e904-4882-a9d9-bea4fcc8dd06","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"x\",\"type\":\"variant\",\"nullable\":true,\"metadata\":{}},{\"name\":\"y\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"variant\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1758692471256}} {"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","variantType-preview"],"writerFeatures":["deletionVectors","variantType-preview"]}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/_delta_log/00000000000000000001.json index cacfdcdc320a..ac5a605e545a 100644 --- a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/_delta_log/00000000000000000001.json +++ b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/_delta_log/00000000000000000001.json @@ -1,2 +1,2 @@ -{"commitInfo":{"timestamp":1752418883651,"userId":"4222103729284476","userName":"jian.chen@starburstdata.com","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"2325234880148729"},"clusterId":"1002-064054-nbosugsx","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"1094"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"57b16f5c-8c44-4349-b47e-7a70373f38e5"}} -{"add":{"path":"part-00000-3dae12c4-61bc-4177-bd36-2c936db81e90-c000.snappy.parquet","partitionValues":{},"size":1094,"modificationTime":1752418883000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":5},\"nullCount\":{\"id\":0,\"x\":1},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1752418883000000","MIN_INSERTION_TIME":"1752418883000000","MAX_INSERTION_TIME":"1752418883000000","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"commitInfo":{"timestamp":1758692482446,"userId":"4222103729284476","userName":"jian.chen@starburstdata.com","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"2325234880148729"},"clusterId":"1002-064054-nbosugsx","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"1945"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"ce6e305b-2e60-4371-9f9d-6356857ad279"}} +{"add":{"path":"part-00000-a4542d9d-2170-4716-b8ee-9a9879ee0289-c000.snappy.parquet","partitionValues":{},"size":1945,"modificationTime":1758692483000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":5},\"nullCount\":{\"id\":0,\"x\":1,\"y\":3},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1758692483000000","MIN_INSERTION_TIME":"1758692483000000","MAX_INSERTION_TIME":"1758692483000000","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/part-00000-3dae12c4-61bc-4177-bd36-2c936db81e90-c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/part-00000-3dae12c4-61bc-4177-bd36-2c936db81e90-c000.snappy.parquet deleted file mode 100644 index 979e991a0c38..000000000000 Binary files a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/part-00000-3dae12c4-61bc-4177-bd36-2c936db81e90-c000.snappy.parquet and /dev/null differ diff --git a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/part-00000-a4542d9d-2170-4716-b8ee-9a9879ee0289-c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/part-00000-a4542d9d-2170-4716-b8ee-9a9879ee0289-c000.snappy.parquet new file mode 100644 index 000000000000..d2545ac37db1 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/part-00000-a4542d9d-2170-4716-b8ee-9a9879ee0289-c000.snappy.parquet differ