diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala index afaa2fb59c39b..52429a63b306d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala @@ -282,6 +282,7 @@ abstract class HashExpression[E] extends Expression { } val hashResultType = CodeGenerator.javaType(dataType) + val typedSeed = if (dataType.sameType(LongType)) s"${seed}L" else s"$seed" val codes = ctx.splitExpressionsWithCurrentInputs( expressions = childrenHash, funcName = "computeHash", @@ -296,7 +297,7 @@ abstract class HashExpression[E] extends Expression { ev.copy(code = code""" - |$hashResultType ${ev.value} = $seed; + |$hashResultType ${ev.value} = $typedSeed; |$codes """.stripMargin) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala index 7dc072722352f..e6cf979649c83 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala @@ -684,6 +684,33 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { assert(murmur3HashPlan(wideRow).getInt(0) == murmursHashEval) } + test("SPARK-30633: xxHash with different type seeds") { + val literal = Literal.create(42L, LongType) + + val longSeeds = Seq( + Long.MinValue, + Integer.MIN_VALUE.toLong - 1L, + 0L, + Integer.MAX_VALUE.toLong + 1L, + Long.MaxValue + ) + for (seed <- longSeeds) { + checkEvaluation(XxHash64(Seq(literal), seed), XxHash64(Seq(literal), seed).eval()) + } + + val intSeeds = Seq( + Integer.MIN_VALUE, + 0, + Integer.MAX_VALUE + ) + for (seed <- intSeeds) { + checkEvaluation(XxHash64(Seq(literal), seed), XxHash64(Seq(literal), seed).eval()) + } + + checkEvaluation(XxHash64(Seq(literal), 100), XxHash64(Seq(literal), 100L).eval()) + checkEvaluation(XxHash64(Seq(literal), 100L), XxHash64(Seq(literal), 100).eval()) + } + private def testHash(inputSchema: StructType): Unit = { val inputGenerator = RandomDataGenerator.forType(inputSchema, nullable = false).get val encoder = RowEncoder(inputSchema) @@ -700,5 +727,17 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(HiveHash(literals), HiveHash(literals).eval()) } } + + val longSeed = Math.abs(seed).toLong + Integer.MAX_VALUE.toLong + test(s"SPARK-30633: xxHash64 with long seed: ${inputSchema.simpleString}") { + for (_ <- 1 to 10) { + val input = encoder.toRow(inputGenerator.apply().asInstanceOf[Row]).asInstanceOf[UnsafeRow] + val literals = input.toSeq(inputSchema).zip(inputSchema.map(_.dataType)).map { + case (value, dt) => Literal.create(value, dt) + } + // Only test the interpreted version has same result with codegen version. + checkEvaluation(XxHash64(literals, longSeed), XxHash64(literals, longSeed).eval()) + } + } } }