Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
4a6f903
Reuse completeNextStageWithFetchFailure
beliefer Jun 19, 2020
96456e2
Merge remote-tracking branch 'upstream/master'
beliefer Jul 1, 2020
4314005
Merge remote-tracking branch 'upstream/master'
beliefer Jul 3, 2020
d6af4a7
Merge remote-tracking branch 'upstream/master'
beliefer Jul 9, 2020
f69094f
Merge remote-tracking branch 'upstream/master'
beliefer Jul 16, 2020
b86a42d
Merge remote-tracking branch 'upstream/master'
beliefer Jul 25, 2020
2ac5159
Merge branch 'master' of github.com:beliefer/spark
beliefer Jul 25, 2020
9021d6c
Merge remote-tracking branch 'upstream/master'
beliefer Jul 28, 2020
74a2ef4
Merge branch 'master' of github.com:beliefer/spark
beliefer Jul 28, 2020
9828158
Merge remote-tracking branch 'upstream/master'
beliefer Jul 31, 2020
9cd1aaf
Merge remote-tracking branch 'upstream/master'
beliefer Aug 5, 2020
abfcbb9
Merge remote-tracking branch 'upstream/master'
beliefer Aug 26, 2020
07c6c81
Merge remote-tracking branch 'upstream/master'
beliefer Sep 1, 2020
0f355ad
Support multiple foldable distinct expressions.
beliefer Sep 2, 2020
580130b
Merge remote-tracking branch 'upstream/master'
beliefer Sep 2, 2020
e15519e
Merge branch 'master' into support-multiple-foldable-distinct-express…
beliefer Sep 2, 2020
9ad3469
Update test case.
beliefer Sep 2, 2020
81eb9e4
Fix bug.
beliefer Sep 2, 2020
52896fb
Update golden files.
beliefer Sep 3, 2020
8622411
Fix bug
beliefer Sep 4, 2020
43cb6a0
Optimize code
beliefer Sep 4, 2020
64834f8
Optimize code
beliefer Sep 7, 2020
fa7d578
Optimize code
beliefer Sep 7, 2020
5f14d54
Optimize code.
beliefer Sep 9, 2020
9a44bec
Optimize code.
beliefer Sep 9, 2020
16588f5
Add comments.
beliefer Sep 10, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -292,13 +292,17 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
// Final aggregate
val operators = expressions.map { e =>
val af = e.aggregateFunction
val naf = patchAggregateFunctionChildren(af) { x =>
val condition = if (e.filter.isDefined) {
e.filter.map(distinctAggFilterAttrLookup.get(_)).get
} else {
None
val condition = e.filter.map(distinctAggFilterAttrLookup.get(_)).flatten
val naf = if (af.children.forall(_.foldable)) {
// If aggregateFunction's children are all foldable, we only put the first child in
// distinctAggGroups. So here we only need to rewrite the first child to
// `if (gid = ...) ...` or `if (gid = ... and condition) ...`.
val firstChild = evalWithinGroup(id, af.children.head, condition)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add some comments to explain why we are doing it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK

af.withNewChildren(firstChild +: af.children.drop(1)).asInstanceOf[AggregateFunction]
} else {
patchAggregateFunctionChildren(af) { x =>
distinctAggChildAttrLookup.get(x).map(evalWithinGroup(id, _, condition))
}
distinctAggChildAttrLookup.get(x).map(evalWithinGroup(id, _, condition))
}
(e, e.copy(aggregateFunction = naf, isDistinct = false, filter = None))
}
Expand Down
10 changes: 10 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/count.sql
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,13 @@ SELECT count(a, b), count(b, a), count(testData.*) FROM testData;
SELECT
count(DISTINCT a, b), count(DISTINCT b, a), count(DISTINCT *), count(DISTINCT testData.*)
FROM testData;

-- distinct count with multiple literals
SELECT count(DISTINCT 3,2);
SELECT count(DISTINCT 2), count(DISTINCT 2,3);
SELECT count(DISTINCT 2), count(DISTINCT 3,2);
SELECT count(DISTINCT a), count(DISTINCT 2,3) FROM testData;
SELECT count(DISTINCT a), count(DISTINCT 3,2) FROM testData;
SELECT count(DISTINCT a), count(DISTINCT 2), count(DISTINCT 2,3) FROM testData;
SELECT count(DISTINCT a), count(DISTINCT 2), count(DISTINCT 3,2) FROM testData;
SELECT count(distinct 0.8), percentile_approx(distinct a, 0.8) FROM testData;
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ SELECT SUM(salary), COUNT(DISTINCT id), COUNT(DISTINCT id) FILTER (WHERE hiredat
SELECT COUNT(DISTINCT 1) FILTER (WHERE a = 1) FROM testData;
SELECT COUNT(DISTINCT id) FILTER (WHERE true) FROM emp;
SELECT COUNT(DISTINCT id) FILTER (WHERE false) FROM emp;
SELECT COUNT(DISTINCT 2), COUNT(DISTINCT 2,3) FILTER (WHERE dept_id = 40) FROM emp;
SELECT COUNT(DISTINCT 2), COUNT(DISTINCT 3,2) FILTER (WHERE dept_id = 40) FROM emp;
SELECT COUNT(DISTINCT 2), COUNT(DISTINCT 2,3) FILTER (WHERE dept_id > 0) FROM emp;
SELECT COUNT(DISTINCT 2), COUNT(DISTINCT 3,2) FILTER (WHERE dept_id > 0) FROM emp;
SELECT COUNT(DISTINCT id), COUNT(DISTINCT 2,3) FILTER (WHERE dept_id = 40) FROM emp;
SELECT COUNT(DISTINCT id), COUNT(DISTINCT 3,2) FILTER (WHERE dept_id = 40) FROM emp;
SELECT COUNT(DISTINCT id), COUNT(DISTINCT 2,3) FILTER (WHERE dept_id > 0) FROM emp;
SELECT COUNT(DISTINCT id), COUNT(DISTINCT 3,2) FILTER (WHERE dept_id > 0) FROM emp;

-- Aggregate with filter and non-empty GroupBy expressions.
SELECT a, COUNT(b) FILTER (WHERE a >= 2) FROM testData GROUP BY a;
Expand Down
66 changes: 65 additions & 1 deletion sql/core/src/test/resources/sql-tests/results/count.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 5
-- Number of queries: 13


-- !query
Expand Down Expand Up @@ -53,3 +53,67 @@ FROM testData
struct<count(DISTINCT a, b):bigint,count(DISTINCT b, a):bigint,count(DISTINCT a, b):bigint,count(DISTINCT a, b):bigint>
-- !query output
3 3 3 3


-- !query
SELECT count(DISTINCT 3,2)
-- !query schema
struct<count(DISTINCT 3, 2):bigint>
-- !query output
1


-- !query
SELECT count(DISTINCT 2), count(DISTINCT 2,3)
-- !query schema
struct<count(DISTINCT 2):bigint,count(DISTINCT 2, 3):bigint>
-- !query output
1 1


-- !query
SELECT count(DISTINCT 2), count(DISTINCT 3,2)
-- !query schema
struct<count(DISTINCT 2):bigint,count(DISTINCT 3, 2):bigint>
-- !query output
1 1


-- !query
SELECT count(DISTINCT a), count(DISTINCT 2,3) FROM testData
-- !query schema
struct<count(DISTINCT a):bigint,count(DISTINCT 2, 3):bigint>
-- !query output
2 1


-- !query
SELECT count(DISTINCT a), count(DISTINCT 3,2) FROM testData
-- !query schema
struct<count(DISTINCT a):bigint,count(DISTINCT 3, 2):bigint>
-- !query output
2 1


-- !query
SELECT count(DISTINCT a), count(DISTINCT 2), count(DISTINCT 2,3) FROM testData
-- !query schema
struct<count(DISTINCT a):bigint,count(DISTINCT 2):bigint,count(DISTINCT 2, 3):bigint>
-- !query output
2 1 1


-- !query
SELECT count(DISTINCT a), count(DISTINCT 2), count(DISTINCT 3,2) FROM testData
-- !query schema
struct<count(DISTINCT a):bigint,count(DISTINCT 2):bigint,count(DISTINCT 3, 2):bigint>
-- !query output
2 1 1


-- !query
SELECT count(distinct 0.8), percentile_approx(distinct a, 0.8) FROM testData
-- !query schema
struct<count(DISTINCT 0.8):bigint,percentile_approx(DISTINCT a, CAST(0.8 AS DOUBLE), 10000):int>
-- !query output
1 2
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 68
-- Number of queries: 76


-- !query
Expand Down Expand Up @@ -150,6 +150,70 @@ struct<count(DISTINCT id) FILTER (WHERE false):bigint>
0


-- !query
SELECT COUNT(DISTINCT 2), COUNT(DISTINCT 2,3) FILTER (WHERE dept_id = 40) FROM emp
-- !query schema
struct<count(DISTINCT 2):bigint,count(DISTINCT 2, 3) FILTER (WHERE (dept_id = 40)):bigint>
-- !query output
1 0


-- !query
SELECT COUNT(DISTINCT 2), COUNT(DISTINCT 3,2) FILTER (WHERE dept_id = 40) FROM emp
-- !query schema
struct<count(DISTINCT 2):bigint,count(DISTINCT 3, 2) FILTER (WHERE (dept_id = 40)):bigint>
-- !query output
1 0


-- !query
SELECT COUNT(DISTINCT 2), COUNT(DISTINCT 2,3) FILTER (WHERE dept_id > 0) FROM emp
-- !query schema
struct<count(DISTINCT 2):bigint,count(DISTINCT 2, 3) FILTER (WHERE (dept_id > 0)):bigint>
-- !query output
1 1


-- !query
SELECT COUNT(DISTINCT 2), COUNT(DISTINCT 3,2) FILTER (WHERE dept_id > 0) FROM emp
-- !query schema
struct<count(DISTINCT 2):bigint,count(DISTINCT 3, 2) FILTER (WHERE (dept_id > 0)):bigint>
-- !query output
1 1


-- !query
SELECT COUNT(DISTINCT id), COUNT(DISTINCT 2,3) FILTER (WHERE dept_id = 40) FROM emp
-- !query schema
struct<count(DISTINCT id):bigint,count(DISTINCT 2, 3) FILTER (WHERE (dept_id = 40)):bigint>
-- !query output
8 0


-- !query
SELECT COUNT(DISTINCT id), COUNT(DISTINCT 3,2) FILTER (WHERE dept_id = 40) FROM emp
-- !query schema
struct<count(DISTINCT id):bigint,count(DISTINCT 3, 2) FILTER (WHERE (dept_id = 40)):bigint>
-- !query output
8 0


-- !query
SELECT COUNT(DISTINCT id), COUNT(DISTINCT 2,3) FILTER (WHERE dept_id > 0) FROM emp
-- !query schema
struct<count(DISTINCT id):bigint,count(DISTINCT 2, 3) FILTER (WHERE (dept_id > 0)):bigint>
-- !query output
8 1


-- !query
SELECT COUNT(DISTINCT id), COUNT(DISTINCT 3,2) FILTER (WHERE dept_id > 0) FROM emp
-- !query schema
struct<count(DISTINCT id):bigint,count(DISTINCT 3, 2) FILTER (WHERE (dept_id > 0)):bigint>
-- !query output
8 1


-- !query
SELECT a, COUNT(b) FILTER (WHERE a >= 2) FROM testData GROUP BY a
-- !query schema
Expand Down