diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/DataSkippingUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/DataSkippingUtils.scala index 0fe62da0ded36..510fe2685de02 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/DataSkippingUtils.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/DataSkippingUtils.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral import org.apache.spark.sql.catalyst.expressions.{Alias, And, Attribute, AttributeReference, EqualNullSafe, EqualTo, Expression, ExtractValue, GetStructField, GreaterThan, GreaterThanOrEqual, In, InSet, IsNotNull, IsNull, LessThan, LessThanOrEqual, Literal, Not, Or, StartsWith, SubqueryExpression} import org.apache.spark.sql.functions.col import org.apache.spark.sql.hudi.ColumnStatsExpressionUtils._ -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.types.{StringType, StructType} import org.apache.spark.sql.{AnalysisException, HoodieCatalystExpressionUtils} import org.apache.spark.unsafe.types.UTF8String @@ -234,7 +234,10 @@ object DataSkippingUtils extends Logging { getTargetIndexedColumnName(attrRef, indexSchema) .map { colName => val targetExprBuilder: Expression => Expression = swapAttributeRefInExpr(sourceExpr, attrRef, _) - hset.map(value => genColumnValuesEqualToExpression(colName, Literal(value), targetExprBuilder)).reduce(Or) + hset.map(value => genColumnValuesEqualToExpression(colName, value match { + case s: UTF8String => Literal(s, StringType) + case _ => Literal(value) + }, targetExprBuilder)).reduce(Or) } // Filter "expr(colA) not in (B1, B2, ...)"