From 516c9d70a724227cf62d04daf2f40a081a6746cb Mon Sep 17 00:00:00 2001 From: chenjian2664 Date: Wed, 24 Sep 2025 14:20:28 +0800 Subject: [PATCH 1/2] Fix reading `null` map with `VARIANT` as value type Previously, a `null` value was appended when the `VARIANT` block had zero positions. This caused a mismatch between key and value block sizes, leading to errors when building map types. --- .../trino/parquet/reader/ParquetReader.java | 23 +++++++----------- .../plugin/deltalake/TestDeltaLakeBasic.java | 18 ++++++++++++-- .../databricks154/test_variant_null/README.md | 15 ++++++------ .../_delta_log/00000000000000000000.json | 4 +-- .../_delta_log/00000000000000000001.json | 4 +-- ...4177-bd36-2c936db81e90-c000.snappy.parquet | Bin 1094 -> 0 bytes ...4716-b8ee-9a9879ee0289-c000.snappy.parquet | Bin 0 -> 1945 bytes 7 files changed, 37 insertions(+), 27 deletions(-) delete mode 100644 plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/part-00000-3dae12c4-61bc-4177-bd36-2c936db81e90-c000.snappy.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/part-00000-a4542d9d-2170-4716-b8ee-9a9879ee0289-c000.snappy.parquet diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java index 90c2b8b57a75..00b9b937d98b 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java @@ -519,21 +519,16 @@ private ColumnChunk readVariant(VariantField field) int positionCount = metadataChunk.getBlock().getPositionCount(); BlockBuilder variantBlock = VARCHAR.createBlockBuilder(null, max(1, positionCount)); - if (positionCount == 0) { - variantBlock.appendNull(); - } - else { - ColumnChunk valueChunk = readColumnChunk(field.getValue()); - for (int position = 0; position < positionCount; position++) { - Slice metadata = VARBINARY.getSlice(metadataChunk.getBlock(), position); - if (metadata.length() == 0) { - variantBlock.appendNull(); - continue; - } - Slice value = VARBINARY.getSlice(valueChunk.getBlock(), position); - Variant variant = new Variant(value.getBytes(), metadata.getBytes()); - VARCHAR.writeSlice(variantBlock, utf8Slice(variant.toJson(zoneId))); + ColumnChunk valueChunk = readColumnChunk(field.getValue()); + for (int position = 0; position < positionCount; position++) { + Slice metadata = VARBINARY.getSlice(metadataChunk.getBlock(), position); + if (metadata.length() == 0) { + variantBlock.appendNull(); + continue; } + Slice value = VARBINARY.getSlice(valueChunk.getBlock(), position); + Variant variant = new Variant(value.getBytes(), metadata.getBytes()); + VARCHAR.writeSlice(variantBlock, utf8Slice(variant.toJson(zoneId))); } return new ColumnChunk(variantBlock.build(), metadataChunk.getDefinitionLevels(), metadataChunk.getRepetitionLevels()); } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java index 27db0930313b..6756c069b330 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java @@ -1613,6 +1613,9 @@ public void testVariant() assertQueryFails("INSERT INTO variant VALUES (2, null, null, null, null, 'new data')", "Unsupported writer features: .*"); } + /** + * @see databricks154.test_variant_null + */ @Test public void testVariantReadNull() throws Exception @@ -1626,11 +1629,22 @@ public void testVariantReadNull() .matches("VALUES 3"); assertThat(query("SELECT * FROM " + tableName + " WHERE id = 3")) - .matches("VALUES (3, JSON 'null')"); + .skippingTypesCheck() + .matches("VALUES (3, JSON 'null', NULL)"); assertThat(query("SELECT * FROM " + tableName + " WHERE id = 4")) - .matches("VALUES (4, CAST(NULL AS JSON))"); + .skippingTypesCheck() + .matches("VALUES (4, NULL, NULL)"); assertThat(query("SELECT id FROM " + tableName + " WHERE x IS NULL")) .matches("VALUES 4"); + + assertThat(query("TABLE " + tableName)) + .skippingTypesCheck() + .matches("VALUES " + + "(1, JSON '{\"a\":1}', MAP(ARRAY['key1'], ARRAY[NULL]))," + + "(2, JSON '{\"a\":2}', MAP(ARRAY['key1'], ARRAY[JSON '{\"key\":\"value\"}']))," + + "(3, JSON 'null', NULL)," + + "(4, NULL, NULL)," + + "(5, JSON '{\"a\":5}', NULL)"); } /** diff --git a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/README.md b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/README.md index 80289dacb918..8bd750834a36 100644 --- a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/README.md +++ b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/README.md @@ -3,14 +3,15 @@ Data generated using Databricks 15.4: ```sql CREATE TABLE test_variant_null ( id INT, -x VARIANT +x VARIANT, +y MAP ) USING DELTA LOCATION ?; -INSERT INTO test_variant_null values -(1, parse_json('{"a":1}')), -(2, parse_json('{"a":2}')), -(3, parse_json('null')), -(4, NULL), -(5, parse_json('{"a":5}')); +INSERT INTO test_variant_null values +(1, parse_json('{"a":1}'), map('key1', NULL)), +(2, parse_json('{"a":2}'), map('key1', parse_json('{"key":"value"}'))), +(3, parse_json('null'), NULL), +(4, NULL, NULL), +(5, parse_json('{"a":5}'), NULL); ``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/_delta_log/00000000000000000000.json index 407e8afbab20..a7209e6edc30 100644 --- a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/_delta_log/00000000000000000000.json +++ b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/_delta_log/00000000000000000000.json @@ -1,3 +1,3 @@ -{"commitInfo":{"timestamp":1752418845893,"userId":"4222103729284476","userName":"jian.chen@starburstdata.com","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.enableDeletionVectors\":\"true\"}","statsOnLoad":false},"notebook":{"notebookId":"2325234880148729"},"clusterId":"1002-064054-nbosugsx","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"7a8ee623-3291-4bb7-9b54-e1d7fa6f7bbb"}} -{"metaData":{"id":"f363d3d3-6ad6-4228-a48a-53d4e60f58a3","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"x\",\"type\":\"variant\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1752418845102}} +{"commitInfo":{"timestamp":1758692471407,"userId":"4222103729284476","userName":"jian.chen@starburstdata.com","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.enableDeletionVectors\":\"true\"}","statsOnLoad":false},"notebook":{"notebookId":"2325234880148729"},"clusterId":"1002-064054-nbosugsx","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"756c0581-29cf-436f-a2fe-8251bf671f32"}} +{"metaData":{"id":"94ac345a-e904-4882-a9d9-bea4fcc8dd06","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"x\",\"type\":\"variant\",\"nullable\":true,\"metadata\":{}},{\"name\":\"y\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"variant\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1758692471256}} {"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","variantType-preview"],"writerFeatures":["deletionVectors","variantType-preview"]}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/_delta_log/00000000000000000001.json index cacfdcdc320a..ac5a605e545a 100644 --- a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/_delta_log/00000000000000000001.json +++ b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/_delta_log/00000000000000000001.json @@ -1,2 +1,2 @@ -{"commitInfo":{"timestamp":1752418883651,"userId":"4222103729284476","userName":"jian.chen@starburstdata.com","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"2325234880148729"},"clusterId":"1002-064054-nbosugsx","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"1094"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"57b16f5c-8c44-4349-b47e-7a70373f38e5"}} -{"add":{"path":"part-00000-3dae12c4-61bc-4177-bd36-2c936db81e90-c000.snappy.parquet","partitionValues":{},"size":1094,"modificationTime":1752418883000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":5},\"nullCount\":{\"id\":0,\"x\":1},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1752418883000000","MIN_INSERTION_TIME":"1752418883000000","MAX_INSERTION_TIME":"1752418883000000","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"commitInfo":{"timestamp":1758692482446,"userId":"4222103729284476","userName":"jian.chen@starburstdata.com","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"2325234880148729"},"clusterId":"1002-064054-nbosugsx","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"1945"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/15.4.x-scala2.12","txnId":"ce6e305b-2e60-4371-9f9d-6356857ad279"}} +{"add":{"path":"part-00000-a4542d9d-2170-4716-b8ee-9a9879ee0289-c000.snappy.parquet","partitionValues":{},"size":1945,"modificationTime":1758692483000,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":5},\"nullCount\":{\"id\":0,\"x\":1,\"y\":3},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1758692483000000","MIN_INSERTION_TIME":"1758692483000000","MAX_INSERTION_TIME":"1758692483000000","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/part-00000-3dae12c4-61bc-4177-bd36-2c936db81e90-c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/part-00000-3dae12c4-61bc-4177-bd36-2c936db81e90-c000.snappy.parquet deleted file mode 100644 index 979e991a0c38817309c6d2707ca326fe8f1ca6ca..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1094 zcmaJ>%Wl&^6dg~J)`Tu7>I^orXk^+bl@iOZ*m0EwAS51QQL#ciRE?cUNt#z}rvz2q zAy(Z%g^*Yvmi>V)S%ZZ52ZUgQSi_EMJAFtXl*GPsa?iQv9xGY9dew|lWBE;$@Qd!riVA@q7e%> z*Q4Mu?=x^q@G_Qgr-kZR5+(^Sw~VC1HtUUfR*QWevlfdPqD5Jen`Kb}rTUBr(}74Z zEmEKQWwDt#&(@%*lJ1e6zz}2%!w{msa%6%2#6&~ou0rNArHU}gW~Oq_1v)8rn+~oV z?F}KE*4WE~#+zKCaez~i{ER4r$g$Z}=b1qF1n4M#5fl*1vXYYEkX%h378n9o$qmuB zc|z&OM3er2oysEph8LjmjwB7k4ULT$WTvI@Y1=%EI>Uifu(Z0SBWdNYj%cf=LDE}e z9&6$7iFOpCv@h(a@zW7kSJfyE$3d)C)pm#XT9LZ?U`HLWe)3hP1@N@3Gl=;H4*?jA zdp+j&paGb?sve1|t2?{9)noGHguBhc4jUvh{>y!|i=<1zu&*W0*$+FxW;ES2=#8V8 zhqqeNf}!hXO?Pa)Zr29>FdA<}6C^GElhz&jH^Sj~1iWj8TQ_Z^VY&^&GS?f9ZQ8o& z*^c2lmgSnRv2JnKvOJG5+c0d`Gd-v6`GMgCj>#OSVIb-LG|O5)EKS2N8Jb~gR_(-c zO$Udiia+l3S|!tN*vzZ9d~P@laEn{qH@(2rjkf2t+a71GYjD0IHx-1kH@XZz*fsbE F`2*^X;syW! diff --git a/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/part-00000-a4542d9d-2170-4716-b8ee-9a9879ee0289-c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/databricks154/test_variant_null/part-00000-a4542d9d-2170-4716-b8ee-9a9879ee0289-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d2545ac37db1ed8f4fcbc230b2f9910b96129499 GIT binary patch literal 1945 zcma)7Ply|36#r(DOs3hTwzA*nj0|lE-H<|dm?V=)Cgf26fVC{Ls0UGW`%N<0&2Ii| zXOcBx*^7t=@gS(EpqJvI>`A>;L|W;sVyPDo6%VC|-g;iegYWyk$)D9lCCTJ{Z{GX8 z-}}8c$=Vao7cjy$yl($;=VrpdDL5)vMdY(PEszML<Q?)S=OWxj0P_*BQ3$@6Sms*2ee5Cl!uIAe)4J_ z;L36B1hlshEXpznA zVXS^+1CJbtnw#IwBlqSt`+?OyRPZH~y&o6-9b5aTgs+jy5MH0|_tb#0lbxUX=F9!c z%H2xh8X{jNNGhw6uM$h3`>Lk(gAJ4JP}pmeueiDE2VuM4(-I}KY+6V=|Bs09O2>q! z;mXh-m_h$_bF%2#NAg}~Fy8SqwM;k&hK)fcpJ}!IPBYBZUfIj^XqVk;H^Ci+wR;19 z!w>p%cx`uWTRG*jE#-_K8zN3+~r8nkJT)%c&e<7w`cW|!`O6ISWPDeZP| z16=&+jGVJV-1}6&H=yla_#7}6Z1?|z!mIm8JJaZQP4;u(1?|Rm7>V8J48wsRJloWk z3YJw|u^ihf+bcb<9}YLd5z@{co3_>Wo(cNH9mspKP$^d(7p`u_sjNGeT`HC>+qRu* z)dj2Ktb2`W(W$s@gF4hM`DI!uRvHD{Dimp};@S?mB$}``a760Dlr| H@TdG2M-z{p literal 0 HcmV?d00001 From ee7c8456134be512ca03f05b3d5dcd6d8635b2ef Mon Sep 17 00:00:00 2001 From: chenjian2664 Date: Thu, 25 Sep 2025 17:03:55 +0800 Subject: [PATCH 2/2] Empty