Skip to content

Commit 4784edd

Browse files
committed
[SPARK-33593][SQL] Parquet vector reader incorrect with binary partition value
1 parent 0c19497 commit 4784edd

File tree

2 files changed

+17
-0
lines changed

2 files changed

+17
-0
lines changed

sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ public static void populate(WritableColumnVector col, InternalRow row, int field
5454
} else {
5555
if (t == DataTypes.BooleanType) {
5656
col.putBooleans(0, capacity, row.getBoolean(fieldIdx));
57+
} else if (t == DataTypes.BinaryType) {
58+
col.putByteArray(0, row.getBinary(fieldIdx));
5759
} else if (t == DataTypes.ByteType) {
5860
col.putBytes(0, capacity, row.getByte(fieldIdx));
5961
} else if (t == DataTypes.ShortType) {

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3745,6 +3745,21 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
37453745
}
37463746
}
37473747
}
3748+
3749+
test("SPARK-33593: Parquet vector reader incorrect with binary partition value") {
3750+
Seq(true).foreach(tag => {
3751+
withSQLConf("spark.sql.parquet.enableVectorizedReader" -> tag.toString) {
3752+
withTable("t1") {
3753+
sql(
3754+
"""CREATE TABLE t1(name STRING, id BINARY, part BINARY)
3755+
| USING PARQUET PARTITIONED BY (part)""".stripMargin)
3756+
sql(s"INSERT INTO t1 PARTITION(part = 'Spark SQL') VALUES('a', X'537061726B2053514C')")
3757+
checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t1"),
3758+
Row("a", "Spark SQL", "Spark SQL"))
3759+
}
3760+
}
3761+
})
3762+
}
37483763
}
37493764

37503765
case class Foo(bar: Option[String])

0 commit comments

Comments
 (0)