Skip to content

Commit

Permalink
feat(pyspark): implement count distinct
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Mar 27, 2023
1 parent aea4ccd commit db29e10
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
5 changes: 5 additions & 0 deletions ibis/backends/pyspark/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,11 @@ def compile_count(t, op, **kwargs):
return compile_aggregator(t, op, fn=F.count, **kwargs)


@compiles(ops.CountDistinct)
def compile_count_distinct(t, op, **kwargs):
return compile_aggregator(t, op, fn=F.count_distinct, **kwargs)


@compiles(ops.CountStar)
def compile_count_star(t, op, aggcontext=None, **kwargs):
src_table = t.translate(op.arg, **kwargs)
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/tests/test_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def mean_and_std(v):
lambda t, where: t.bool_col[where].dropna().nunique(),
id='nunique',
marks=pytest.mark.notimpl(
["pyspark", "datafusion"], raises=com.OperationNotDefinedError
["datafusion"], raises=com.OperationNotDefinedError
),
),
param(
Expand Down

0 comments on commit db29e10

Please sign in to comment.