Skip to content

Commit 070e12a

Browse files
committed
Supporting multi column support in count(distinct c1,c2..) in Spark SQL
1 parent ceb6281 commit 070e12a

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ class SqlParser extends AbstractSparkSQLParser {
277277
| SUM ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => SumDistinct(exp) }
278278
| COUNT ~ "(" ~> "*" <~ ")" ^^ { case _ => Count(Literal(1)) }
279279
| COUNT ~ "(" ~> expression <~ ")" ^^ { case exp => Count(exp) }
280-
| COUNT ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => CountDistinct(exp :: Nil) }
280+
| COUNT ~> "(" ~> DISTINCT ~> repsep(expression, ",") <~ ")" ^^ { case exps => CountDistinct(exps) }
281281
| APPROXIMATE ~ COUNT ~ "(" ~ DISTINCT ~> expression <~ ")" ^^
282282
{ case exp => ApproxCountDistinct(exp) }
283283
| APPROXIMATE ~> "(" ~> floatLit ~ ")" ~ COUNT ~ "(" ~ DISTINCT ~ expression <~ ")" ^^

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -992,4 +992,11 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
992992
"nulldata2 on nulldata1.value <=> nulldata2.value"),
993993
(1 to 2).map(i => Seq(i)))
994994
}
995+
996+
test("Supporting multi column support for count(distinct ..) function in Spark SQL") {
997+
val data = TestData(1,"val_1") :: TestData(2,"val_2") :: Nil
998+
val rdd = sparkContext.parallelize((0 to 1).map(i => data(i)))
999+
rdd.registerTempTable("distinctData")
1000+
checkAnswer(sql("SELECT COUNT(DISTINCT key,value) FROM distinctData"), 2)
1001+
}
9951002
}

0 commit comments

Comments
 (0)