Skip to content

Commit 33ef010

Browse files
yifhuamanuzhang
authored andcommitted
[HADP-52545][HADP-45102] Add config not to pad decimal with trailing zeros (apache#243)
* [HADP-45102] Add config not to pad decimal with trailing zeros for compatibility with 2.3.1 (apache#101) ### What changes were proposed in this pull request? Add config `spark.sql.legacy.decimal.padTrailingZeros` such that padding decimal with trailing zeros can be disabled for spark-sql interface ### Why are the changes needed? In 2.3.1 decimals are not padded with trailing zeros which is changed in apache#26697. This is to keep compatibility with 2.3.1. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing UT updated. Co-authored-by: tianlzhang <[email protected]>
1 parent f85fc46 commit 33ef010

File tree

3 files changed

+48
-12
lines changed

3 files changed

+48
-12
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3255,6 +3255,14 @@ object SQLConf {
32553255
.booleanConf
32563256
.createWithDefault(true)
32573257

3258+
val DECIMAL_PAD_TRAILING_ZEROS =
3259+
buildConf("spark.sql.legacy.decimal.padTrailingZeros")
3260+
.internal()
3261+
.doc("Whether to pad decimal numbers with trailing zeros to the scale of the column for " +
3262+
"spark-sql interface")
3263+
.booleanConf
3264+
.createWithDefault(true)
3265+
32583266
val SQL_OPTIONS_REDACTION_PATTERN = buildConf("spark.sql.redaction.options.regex")
32593267
.doc("Regex to decide which keys in a Spark SQL command's options map contain sensitive " +
32603268
"information. The values of options whose names that match this regex will be redacted " +
@@ -5740,6 +5748,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
57405748

57415749
def literalPickMinimumPrecision: Boolean = getConf(LITERAL_PICK_MINIMUM_PRECISION)
57425750

5751+
def decimalPadTrailingZeros: Boolean = getConf(DECIMAL_PAD_TRAILING_ZEROS)
5752+
57435753
def continuousStreamingEpochBacklogQueueSize: Int =
57445754
getConf(CONTINUOUS_STREAMING_EPOCH_BACKLOG_QUEUE_SIZE)
57455755

sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,14 @@ object HiveResult {
8181
.map(_.mkString("\t"))
8282
}
8383

84+
private def formatDecimal(d: java.math.BigDecimal): String = {
85+
if (d.compareTo(java.math.BigDecimal.ZERO) == 0) {
86+
java.math.BigDecimal.ZERO.toPlainString
87+
} else {
88+
d.stripTrailingZeros().toPlainString // Hive strips trailing zeros
89+
}
90+
}
91+
8492
private def formatDescribeTableOutput(rows: Array[Row]): Seq[String] = {
8593
rows.map {
8694
case Row(name: String, dataType: String, comment) =>
@@ -103,7 +111,13 @@ object HiveResult {
103111
case (i: Instant, TimestampType) => formatters.timestamp.format(i)
104112
case (l: LocalDateTime, TimestampNTZType) => formatters.timestamp.format(l)
105113
case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8)
106-
case (decimal: java.math.BigDecimal, DecimalType()) => decimal.toPlainString
114+
case (decimal: java.math.BigDecimal, DecimalType()) =>
115+
// HADP-45102: to optionally keep compatibility with 2.3.1 where trailing zeros are stripped
116+
if (SQLConf.get.decimalPadTrailingZeros) {
117+
decimal.toPlainString
118+
} else {
119+
formatDecimal(decimal)
120+
}
107121
case (n, _: NumericType) => n.toString
108122
case (s: String, StringType) => if (nested) "\"" + s + "\"" else s
109123
case (interval: CalendarInterval, CalendarIntervalType) => interval.toString

sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -65,18 +65,30 @@ class HiveResultSuite extends SharedSparkSession {
6565
}
6666

6767
test("decimal formatting in hive result") {
68-
val df = Seq(new java.math.BigDecimal("1")).toDS()
69-
Seq(2, 6, 18).foreach { scala =>
70-
val executedPlan =
71-
df.selectExpr(s"CAST(value AS decimal(38, $scala))").queryExecution.executedPlan
72-
val result = hiveResultString(executedPlan)
73-
assert(result.head.split("\\.").last.length === scala)
74-
}
68+
Seq("true", "false").foreach { padTrailingZeros =>
69+
withSQLConf(SQLConf.DECIMAL_PAD_TRAILING_ZEROS.key -> padTrailingZeros) {
70+
val df = Seq(new java.math.BigDecimal("1")).toDS()
71+
Seq(2, 6, 18).foreach { scale =>
72+
val executedPlan =
73+
df.selectExpr(s"CAST(value AS decimal(38, $scale))").queryExecution.executedPlan
74+
val result = hiveResultString(executedPlan)
75+
if (padTrailingZeros.toBoolean) {
76+
assert(result.head.split("\\.").last.length === scale)
77+
} else {
78+
assert(result.head.split("\\.").length == 1)
79+
}
80+
}
7581

76-
val executedPlan = Seq(java.math.BigDecimal.ZERO).toDS()
77-
.selectExpr(s"CAST(value AS decimal(38, 8))").queryExecution.executedPlan
78-
val result = hiveResultString(executedPlan)
79-
assert(result.head === "0.00000000")
82+
val executedPlan = Seq(java.math.BigDecimal.ZERO).toDS()
83+
.selectExpr(s"CAST(value AS decimal(38, 8))").queryExecution.executedPlan
84+
val result = hiveResultString(executedPlan)
85+
if (padTrailingZeros.toBoolean) {
86+
assert(result.head === "0.00000000")
87+
} else {
88+
assert(result.head === "0")
89+
}
90+
}
91+
}
8092
}
8193

8294
test("SHOW TABLES in hive result") {

0 commit comments

Comments
 (0)