Skip to content

Commit 79065ac

Browse files
committed
[SPARK-49275][SQL] Fix return type nullness of the xpath expression
The `xpath` expression incorrectly marks its return type as array of non-null strings. However, it can actually return an array containing nulls. This can cause NPE in code generation, such as query `select coalesce(xpath(repeat('<a></a>', id), 'a')[0], '') from range(1, 2)`. It avoids potential failures in queries that uses the `xpath` expression. No. A new unit test. It would fail without the change in the PR. No. Closes apache#47796 from chenhao-db/fix_xpath_nullness. Authored-by: Chenhao Li <[email protected]> Signed-off-by: Max Gekk <[email protected]>
1 parent 38ad0e7 commit 79065ac

File tree

2 files changed

+9
-1
lines changed

2 files changed

+9
-1
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,13 +239,16 @@ case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
239239
Examples:
240240
> SELECT _FUNC_('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()');
241241
["b1","b2","b3"]
242+
> SELECT _FUNC_('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b');
243+
[null,null,null]
242244
""",
243245
since = "2.0.0",
244246
group = "xml_funcs")
245247
// scalastyle:on line.size.limit
246248
case class XPathList(xml: Expression, path: Expression) extends XPathExtract {
247249
override def prettyName: String = "xpath"
248-
override def dataType: DataType = ArrayType(StringType, containsNull = false)
250+
251+
override def dataType: DataType = ArrayType(StringType)
249252

250253
override def nullSafeEval(xml: Any, path: Any): Any = {
251254
val nodeList = xpathUtil.evalNodeList(xml.asInstanceOf[UTF8String].toString, pathString)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathExpressionSuite.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,11 @@ class XPathExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
185185
testExpr("<a><b class='bb'>b1</b><b>b2</b><b>b3</b><c class='bb'>c1</c><c>c2</c></a>",
186186
"a/*[@class='bb']/text()", Seq("b1", "c1"))
187187

188+
checkEvaluation(
189+
Coalesce(Seq(
190+
GetArrayItem(XPathList(Literal("<a></a>"), Literal("a")), Literal(0)),
191+
Literal("nul"))), "nul")
192+
188193
testNullAndErrorBehavior(testExpr)
189194
}
190195

0 commit comments

Comments
 (0)