Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion datafusion/functions-aggregate/src/approx_percentile_cont.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,24 @@ pub fn approx_percentile_cont(
+-----------------------------------------------------------------------+
| 65.0 |
+-----------------------------------------------------------------------+
```"#,
```
An alternate syntax is also supported:
```sql
> SELECT approx_percentile_cont(column_name, 0.75) FROM table_name;
+-----------------------------------------------+
| approx_percentile_cont(column_name, 0.75) |
+-----------------------------------------------+
| 65.0 |
+-----------------------------------------------+

> SELECT approx_percentile_cont(column_name, 0.75, 100) FROM table_name;
+----------------------------------------------------------+
| approx_percentile_cont(column_name, 0.75, 100) |
+----------------------------------------------------------+
| 65.0 |
+----------------------------------------------------------+
```
"#,
standard_argument(name = "expression",),
argument(
name = "percentile",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ pub fn approx_percentile_cont_with_weight(
+--------------------------------------------------------------------------------------------------+
| 78.5 |
+--------------------------------------------------------------------------------------------------+
```
An alternative syntax is also supported:

```sql
> SELECT approx_percentile_cont_with_weight(column_name, weight_column, 0.90) FROM table_name;
+--------------------------------------------------+
| approx_percentile_cont_with_weight(column_name, weight_column, 0.90) |
+--------------------------------------------------+
| 78.5 |
+--------------------------------------------------+
```"#,
standard_argument(name = "expression", prefix = "The"),
argument(
Expand Down
7 changes: 2 additions & 5 deletions datafusion/sql/src/expr/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -392,10 +392,6 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
} else {
// User defined aggregate functions (UDAF) have precedence in case it has the same name as a scalar built-in function
if let Some(fm) = self.context_provider.get_aggregate_meta(&name) {
if fm.is_ordered_set_aggregate() && within_group.is_empty() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there appears to be no tests for this error nor any reason I can see for not allowing it

return plan_err!("WITHIN GROUP clause is required when calling ordered set aggregate function({})", fm.name());
}

if null_treatment.is_some() && !fm.supports_null_handling_clause() {
return plan_err!(
"[IGNORE | RESPECT] NULLS are not permitted for {}",
Expand All @@ -415,7 +411,8 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
None,
)?;

// add target column expression in within group clause to function arguments
// Add the WITHIN GROUP ordering expressions to the front of the argument list
// So function(arg) WITHIN GROUP (ORDER BY x) becomes function(x, arg)
if !within_group.is_empty() {
args = within_group
.iter()
Expand Down
41 changes: 40 additions & 1 deletion datafusion/sqllogictest/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1337,7 +1337,7 @@ SELECT approx_distinct(c9) AS a, approx_distinct(c9) AS b FROM aggregate_test_10
## Column `c12` is omitted due to a large relative error (~10%) due to the small
## float values.

#csv_query_approx_percentile_cont (c2)
# csv_query_approx_percentile_cont (c2)
query B
SELECT (ABS(1 - CAST(approx_percentile_cont(0.1) WITHIN GROUP (ORDER BY c2) AS DOUBLE) / 1.0) < 0.05) AS q FROM aggregate_test_100
----
Expand All @@ -1353,6 +1353,23 @@ SELECT (ABS(1 - CAST(approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY c2) AS D
----
true


# csv_query_approx_percentile_cont (c2, alternate syntax, should be the same as above)
query B
SELECT (ABS(1 - CAST(approx_percentile_cont(c2, 0.1) AS DOUBLE) / 1.0) < 0.05) AS q FROM aggregate_test_100
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now both the new and old queries work

----
true

query B
SELECT (ABS(1 - CAST(approx_percentile_cont(c2, 0.5) AS DOUBLE) / 3.0) < 0.05) AS q FROM aggregate_test_100
----
true

query B
SELECT (ABS(1 - CAST(approx_percentile_cont(c2, 0.9) AS DOUBLE) / 5.0) < 0.05) AS q FROM aggregate_test_100
----
true

# csv_query_approx_percentile_cont (c3)
query B
SELECT (ABS(1 - CAST(approx_percentile_cont(0.1) WITHIN GROUP (ORDER BY c3) AS DOUBLE) / -95.3) < 0.05) AS q FROM aggregate_test_100
Expand Down Expand Up @@ -1793,6 +1810,17 @@ c 122
d 124
e 115


# csv_query_approx_percentile_cont_with_weight (should be the same as above)
query TI
SELECT c1, approx_percentile_cont(c3, 0.95) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
----
a 73
b 68
c 122
d 124
e 115

query TI
SELECT c1, approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY c3 DESC) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
----
Expand All @@ -1812,6 +1840,17 @@ c 122
d 124
e 115

# csv_query_approx_percentile_cont_with_weight alternate syntax
query TI
SELECT c1, approx_percentile_cont_with_weight(c3, 1, 0.95) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
----
a 73
b 68
c 122
d 124
e 115


query TI
SELECT c1, approx_percentile_cont_with_weight(1, 0.95) WITHIN GROUP (ORDER BY c3 DESC) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
----
Expand Down
29 changes: 29 additions & 0 deletions docs/source/user-guide/sql/aggregate_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -1065,6 +1065,24 @@ approx_percentile_cont(percentile [, centroids]) WITHIN GROUP (ORDER BY expressi
+-----------------------------------------------------------------------+
```

An alternate syntax is also supported:

```sql
> SELECT approx_percentile_cont(column_name, 0.75) FROM table_name;
+-----------------------------------------------+
| approx_percentile_cont(column_name, 0.75) |
+-----------------------------------------------+
| 65.0 |
+-----------------------------------------------+

> SELECT approx_percentile_cont(column_name, 0.75, 100) FROM table_name;
+----------------------------------------------------------+
| approx_percentile_cont(column_name, 0.75, 100) |
+----------------------------------------------------------+
| 65.0 |
+----------------------------------------------------------+
```

### `approx_percentile_cont_with_weight`

Returns the weighted approximate percentile of input values using the t-digest algorithm.
Expand Down Expand Up @@ -1096,3 +1114,14 @@ approx_percentile_cont_with_weight(weight, percentile [, centroids]) WITHIN GROU
| 78.5 |
+--------------------------------------------------------------------------------------------------+
```

An alternative syntax is also supported:

```sql
> SELECT approx_percentile_cont_with_weight(column_name, weight_column, 0.90) FROM table_name;
+--------------------------------------------------+
| approx_percentile_cont_with_weight(column_name, weight_column, 0.90) |
+--------------------------------------------------+
| 78.5 |
+--------------------------------------------------+
```
Loading