diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml index 7827a0fe7d904..5042c25884289 100644 --- a/docs/_data/menu-sql.yaml +++ b/docs/_data/menu-sql.yaml @@ -168,6 +168,8 @@ url: sql-ref-syntax-qry-select-inline-table.html - text: Common Table Expression url: sql-ref-syntax-qry-select-cte.html + - text: Window Function + url: sql-ref-syntax-qry-window.html - text: EXPLAIN url: sql-ref-syntax-qry-explain.html - text: Auxiliary Statements diff --git a/docs/sql-ref-syntax-qry-window.md b/docs/sql-ref-syntax-qry-window.md index 767f47715e89c..4ec1af70f6079 100644 --- a/docs/sql-ref-syntax-qry-window.md +++ b/docs/sql-ref-syntax-qry-window.md @@ -1,7 +1,7 @@ --- layout: global -title: Windowing Analytic Functions -displayTitle: Windowing Analytic Functions +title: Window Functions +displayTitle: Window Functions license: | Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with @@ -19,4 +19,188 @@ license: | limitations under the License. --- -**This page is under construction** +### Description + +Window functions operate on a group of rows, referred to as a window, and calculate a return value for each row based on the group of rows. Window functions are useful for processing tasks such as calculating a moving average, computing a cumulative statistic, or accessing the value of rows given the relative position of the current row. + +### Syntax + +{% highlight sql %} +window_function OVER +( [ { PARTITION | DISTRIBUTE } BY partition_col_name = partition_col_val ( [ , ... ] ) ] + { ORDER | SORT } BY expression [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [ , ... ] + [ window_frame ] ) +{% endhighlight %} + +### Parameters + +
+
window_function
+
+ + + +
+
+
+
window_frame
+
+ Specifies which row to start the window on and where to end it.
+ Syntax:
+ { RANGE | ROWS } { frame_start | BETWEEN frame_start AND frame_end }
+ If frame_end is omitted it defaults to CURRENT ROW.

+ +
+
+ +### Examples + +{% highlight sql %} +CREATE TABLE employees (name STRING, dept STRING, salary INT, age INT); + +INSERT INTO employees VALUES ("Lisa", "Sales", 10000, 35); +INSERT INTO employees VALUES ("Evan", "Sales", 32000, 38); +INSERT INTO employees VALUES ("Fred", "Engineering", 21000, 28); +INSERT INTO employees VALUES ("Alex", "Sales", 30000, 33); +INSERT INTO employees VALUES ("Tom", "Engineering", 23000, 33); +INSERT INTO employees VALUES ("Jane", "Marketing", 29000, 28); +INSERT INTO employees VALUES ("Jeff", "Marketing", 35000, 38); +INSERT INTO employees VALUES ("Paul", "Engineering", 29000, 23); +INSERT INTO employees VALUES ("Chloe", "Engineering", 23000, 25); + +SELECT * FROM employees; + +-----+-----------+------+-----+ + | name| dept|salary| age| + +-----+-----------+------+-----+ + |Chloe|Engineering| 23000| 25| + | Fred|Engineering| 21000| 28| + | Paul|Engineering| 29000| 23| + |Helen| Marketing| 29000| 40| + | Tom|Engineering| 23000| 33| + | Jane| Marketing| 29000| 28| + | Jeff| Marketing| 35000| 38| + | Evan| Sales| 32000| 38| + | Lisa| Sales| 10000| 35| + | Alex| Sales| 30000| 33| + +-----+-----------+------+-----+ + +SELECT name, dept, RANK() OVER (PARTITION BY dept ORDER BY salary) AS rank FROM employees; + +-----+-----------+------+----+ + | name| dept|salary|rank| + +-----+-----------+------+----+ + | Lisa| Sales| 10000| 1| + | Alex| Sales| 30000| 2| + | Evan| Sales| 32000| 3| + | Fred|Engineering| 21000| 1| + | Tom|Engineering| 23000| 2| + |Chloe|Engineering| 23000| 2| + | Paul|Engineering| 29000| 4| + |Helen| Marketing| 29000| 1| + | Jane| Marketing| 29000| 1| + | Jeff| Marketing| 35000| 3| + +-----+-----------+------+----+ + +SELECT name, dept, DENSE_RANK() OVER (PARTITION BY dept ORDER BY salary ROWS BETWEEN + UNBOUNDED PRECEDING AND CURRENT ROW) AS dense_rank FROM employees; + +-----+-----------+------+----------+ + | name| dept|salary|dense_rank| + +-----+-----------+------+----------+ + | Lisa| Sales| 10000| 1| + | Alex| Sales| 30000| 2| + | Evan| Sales| 32000| 3| + | Fred|Engineering| 21000| 1| + | Tom|Engineering| 23000| 2| + |Chloe|Engineering| 23000| 2| + | Paul|Engineering| 29000| 3| + |Helen| Marketing| 29000| 1| + | Jane| Marketing| 29000| 1| + | Jeff| Marketing| 35000| 2| + +-----+-----------+------+----------+ + +SELECT name, dept, age, CUME_DIST() OVER (PARTITION BY dept ORDER BY age + RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cume_dist FROM employees; + +-----+-----------+------+------------------+ + | name| dept|age | cume_dist| + +-----+-----------+------+------------------+ + | Alex| Sales| 33|0.3333333333333333| + | Lisa| Sales| 35|0.6666666666666666| + | Evan| Sales| 38| 1.0| + | Paul|Engineering| 23| 0.25| + |Chloe|Engineering| 25| 0.75| + | Fred|Engineering| 28| 0.25| + | Tom|Engineering| 33| 1.0| + | Jane| Marketing| 28|0.3333333333333333| + | Jeff| Marketing| 38|0.6666666666666666| + |Helen| Marketing| 40| 1.0| + +-----+-----------+------+------------------+ + +SELECT name, dept, salary, MIN(salary) OVER (PARTITION BY dept ORDER BY salary) AS min + FROM employees; + +-----+-----------+------+-----+ + | name| dept|salary| min| + +-----+-----------+------+-----+ + | Lisa| Sales| 10000|10000| + | Alex| Sales| 30000|10000| + | Evan| Sales| 32000|10000| + |Helen| Marketing| 29000|29000| + | Jane| Marketing| 29000|29000| + | Jeff| Marketing| 35000|29000| + | Fred|Engineering| 21000|21000| + | Tom|Engineering| 23000|21000| + |Chloe|Engineering| 23000|21000| + | Paul|Engineering| 29000|21000| + +-----+-----------+------+-----+ + +SELECT name, salary, + LAG(salary) OVER (PARTITION BY dept ORDER BY salary) AS lag, + LEAD(salary, 1, 0) OVER (PARTITION BY dept ORDER BY salary) AS lead + FROM employees; + +-----+-----------+------+-----+-----+ + | name| dept|salary| lag| lead| + +-----+-----------+------+-----+-----+ + | Lisa| Sales| 10000|NULL |30000| + | Alex| Sales| 30000|10000|32000| + | Evan| Sales| 32000|30000| 0| + | Fred|Engineering| 21000| NULL|23000| + |Chloe|Engineering| 23000|21000|23000| + | Tom|Engineering| 23000|23000|29000| + | Paul|Engineering| 29000|23000| 0| + |Helen| Marketing| 29000| NULL|29000| + | Jane| Marketing| 29000|29000|35000| + | Jeff| Marketing| 35000|29000| 0| + +-----+-----------+------+-----+-----+ +{% endhighlight %} + +### Related Statements + + * [SELECT](sql-ref-syntax-qry-select.html)