From 88d8188219aa3f79477a09e7b9d5d4aa5689f6f6 Mon Sep 17 00:00:00 2001 From: Claus Herther Date: Mon, 12 Oct 2020 16:43:34 -0600 Subject: [PATCH 1/7] Add argument to allow for alternate sorting of column values --- macros/sql/get_column_values.sql | 34 +++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/macros/sql/get_column_values.sql b/macros/sql/get_column_values.sql index cf2277e1..d08869d1 100644 --- a/macros/sql/get_column_values.sql +++ b/macros/sql/get_column_values.sql @@ -10,7 +10,7 @@ Returns: A list of distinct values for the specified columns #} -{% macro get_column_values(table, column, max_records=none, default=none) -%} +{% macro get_column_values(table, column, sort_column=none, sort_direction=none, max_records=none, default=none) -%} {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} {%- if not execute -%} @@ -22,6 +22,7 @@ Returns: schema=table.schema, identifier=table.identifier) -%} + {# If no sort column is supplied, we use the default descending frequency count. #} {%- call statement('get_column_values', fetch_result=true) %} {%- if not target_relation and default is none -%} @@ -36,16 +37,27 @@ Returns: {%- else -%} - select - {{ column }} as value - - from {{ target_relation }} - group by 1 - order by count(*) desc - - {% if max_records is not none %} - limit {{ max_records }} - {% endif %} + with sorted_column_values as ( + + select + {{ column }} as value, + {%- if sort_column is none %} + count(*) as sort_column + {% else %} + {# We take the max sort value for each value to make sure + there are no duplicate rows for each value #} + max({{ sort_column }}) as sort_column + {% endif %} + from {{ target_relation }} + group by 1 + order by 2 {{ sort_direction if sort_direction else "desc" }} + {% endif %} + {% if max_records is not none %} + limit {{ max_records }} + {% endif %} + + ) + select value from sorted_column_values {% endif %} From cd816cf68a78f2f2062e3782cfcd7035b581480d Mon Sep 17 00:00:00 2001 From: Claus Herther Date: Mon, 12 Oct 2020 17:13:07 -0600 Subject: [PATCH 2/7] Update README and CHANGELOG --- CHANGELOG.md | 5 +++++ README.md | 25 +++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c9148587..0f558758 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# dbt-utils v0.6.3 + +## Features +- Adds ability to specify a `sort_column` and `sort_direction` in `get_column_values. + # dbt-utils v0.6.2 ## Fixes diff --git a/README.md b/README.md index a31cb5a6..2e03fdbc 100644 --- a/README.md +++ b/README.md @@ -442,6 +442,8 @@ group by 1 This macro returns the unique values for a column in a given [relation](https://docs.getdbt.com/docs/writing-code-in-dbt/class-reference/#relation). It takes an options `default` argument for compiling when the relation does not already exist. +The `sort_column` argument allows for sorting of values. The default is highest to lowest frequency of values. You can also specify a `sort_direction`. + Usage: ``` -- Returns a list of the top 50 states in the `users` table @@ -453,6 +455,29 @@ Usage: ... ``` + +``` +-- Returns a list of user names sorted by name from the `users` table +{% set names = dbt_utils.get_column_values(table=ref('users'), column='name', sort_column='name', default=[]) %} + +{% for name in names %} + ... +{% endfor %} + +... +``` + +``` +-- Returns a list of user cities sorted by name from the `users` table +{% set cities = dbt_utils.get_column_values(table=ref('users'), column='city_name', sort_column='city_name', default=[]) %} + +{% for city in cities %} + ... +{% endfor %} + +... +``` + #### get_relations_by_prefix Returns a list of [Relations](https://docs.getdbt.com/docs/writing-code-in-dbt/class-reference/#relation) that match a given prefix, with an optional exclusion pattern. It's particularly From 651322c3ebfb04930037138f960e31682a26011e Mon Sep 17 00:00:00 2001 From: clausherther Date: Tue, 22 Dec 2020 09:43:13 -0700 Subject: [PATCH 3/7] Remove extra endif --- macros/sql/get_column_values.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/macros/sql/get_column_values.sql b/macros/sql/get_column_values.sql index d08869d1..fda6bc98 100644 --- a/macros/sql/get_column_values.sql +++ b/macros/sql/get_column_values.sql @@ -51,7 +51,6 @@ Returns: from {{ target_relation }} group by 1 order by 2 {{ sort_direction if sort_direction else "desc" }} - {% endif %} {% if max_records is not none %} limit {{ max_records }} {% endif %} From 20166fc1e1a9d7c36c17cd1b8c8fc91de15ab35a Mon Sep 17 00:00:00 2001 From: clausherther Date: Wed, 23 Dec 2020 17:09:05 -0700 Subject: [PATCH 4/7] Allow for more flexible order_by parameter --- macros/sql/get_column_values.sql | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/macros/sql/get_column_values.sql b/macros/sql/get_column_values.sql index fda6bc98..37133ffc 100644 --- a/macros/sql/get_column_values.sql +++ b/macros/sql/get_column_values.sql @@ -10,7 +10,7 @@ Returns: A list of distinct values for the specified columns #} -{% macro get_column_values(table, column, sort_column=none, sort_direction=none, max_records=none, default=none) -%} +{% macro get_column_values(table, column, max_records=none, default=none, order_by=none, sort_direction=none) -%} {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} {%- if not execute -%} @@ -18,6 +18,8 @@ Returns: {% endif %} {#-- #} + {% set order_by = kwargs.get('order_by', 'count(*)') + {%- set target_relation = adapter.get_relation(database=table.database, schema=table.schema, identifier=table.identifier) -%} @@ -50,8 +52,9 @@ Returns: {% endif %} from {{ target_relation }} group by 1 - order by 2 {{ sort_direction if sort_direction else "desc" }} + order by {{ order_by }} {{ sort_direction or "desc" }} {% if max_records is not none %} + limit {{ max_records }} {% endif %} From 39ea5a6102c3d8c40ca6286a1898b84e047d6fd4 Mon Sep 17 00:00:00 2001 From: clausherther Date: Wed, 23 Dec 2020 17:13:15 -0700 Subject: [PATCH 5/7] Fix missing end parentheses --- macros/sql/get_column_values.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/sql/get_column_values.sql b/macros/sql/get_column_values.sql index 37133ffc..5da4b2f2 100644 --- a/macros/sql/get_column_values.sql +++ b/macros/sql/get_column_values.sql @@ -18,7 +18,7 @@ Returns: {% endif %} {#-- #} - {% set order_by = kwargs.get('order_by', 'count(*)') + {%- set order_by = kwargs.get('order_by', 'count(*)') -%} {%- set target_relation = adapter.get_relation(database=table.database, schema=table.schema, From dd25e2503c33d7512f8c1a11336645b9c350cb7c Mon Sep 17 00:00:00 2001 From: clausherther Date: Wed, 23 Dec 2020 17:16:05 -0700 Subject: [PATCH 6/7] Refactor sort_column --- macros/sql/get_column_values.sql | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/macros/sql/get_column_values.sql b/macros/sql/get_column_values.sql index 5da4b2f2..4897f69b 100644 --- a/macros/sql/get_column_values.sql +++ b/macros/sql/get_column_values.sql @@ -43,13 +43,7 @@ Returns: select {{ column }} as value, - {%- if sort_column is none %} - count(*) as sort_column - {% else %} - {# We take the max sort value for each value to make sure - there are no duplicate rows for each value #} - max({{ sort_column }}) as sort_column - {% endif %} + {{ order_by }} as sort_column from {{ target_relation }} group by 1 order by {{ order_by }} {{ sort_direction or "desc" }} From d9b06fc997df3742e3df4cde68ee297897872008 Mon Sep 17 00:00:00 2001 From: clausherther Date: Wed, 23 Dec 2020 19:50:40 -0700 Subject: [PATCH 7/7] Make order_by work for column alpha sorts --- README.md | 18 +++++++++++++++--- .../models/sql/test_get_column_values.sql | 2 +- macros/sql/get_column_values.sql | 12 ++++++++++-- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2e03fdbc..de4d328c 100644 --- a/README.md +++ b/README.md @@ -442,7 +442,7 @@ group by 1 This macro returns the unique values for a column in a given [relation](https://docs.getdbt.com/docs/writing-code-in-dbt/class-reference/#relation). It takes an options `default` argument for compiling when the relation does not already exist. -The `sort_column` argument allows for sorting of values. The default is highest to lowest frequency of values. You can also specify a `sort_direction`. +The `order_by` argument allows for sorting of values. The default is highest to lowest frequency of values. You can also specify a `sort_direction`. Usage: ``` @@ -458,7 +458,7 @@ Usage: ``` -- Returns a list of user names sorted by name from the `users` table -{% set names = dbt_utils.get_column_values(table=ref('users'), column='name', sort_column='name', default=[]) %} +{% set names = dbt_utils.get_column_values(table=ref('users'), column='name', default=[], order_by='name') %} {% for name in names %} ... @@ -469,7 +469,19 @@ Usage: ``` -- Returns a list of user cities sorted by name from the `users` table -{% set cities = dbt_utils.get_column_values(table=ref('users'), column='city_name', sort_column='city_name', default=[]) %} +{% set cities = dbt_utils.get_column_values(table=ref('users'), column='city_name', default=[], order_by='city_name') %} + +{% for city in cities %} + ... +{% endfor %} + +... +``` + + +``` +-- Returns a list of user cities sorted by name from the `users` table +{% set cities = dbt_utils.get_column_values(table=ref('users'), column='city_name', default=[], order_by='max(created_at)') %} {% for city in cities %} ... diff --git a/integration_tests/models/sql/test_get_column_values.sql b/integration_tests/models/sql/test_get_column_values.sql index bfe5c486..f4095f82 100644 --- a/integration_tests/models/sql/test_get_column_values.sql +++ b/integration_tests/models/sql/test_get_column_values.sql @@ -1,5 +1,5 @@ -{% set columns = dbt_utils.get_column_values(ref('data_get_column_values'), 'field', default = []) %} +{% set columns = dbt_utils.get_column_values(ref('data_get_column_values'), 'field', default = [], order_by="field", sort_direction="desc") %} {% if target.type == 'snowflake' %} diff --git a/macros/sql/get_column_values.sql b/macros/sql/get_column_values.sql index 4897f69b..d46197f7 100644 --- a/macros/sql/get_column_values.sql +++ b/macros/sql/get_column_values.sql @@ -18,7 +18,15 @@ Returns: {% endif %} {#-- #} - {%- set order_by = kwargs.get('order_by', 'count(*)') -%} + {%- set order_by = order_by if order_by else 'count(*)' -%} + {%- set order_by = 'max(' ~ order_by ~ ')' if order_by == column else order_by -%} + {%- set sort_direction -%} + {%- if order_by == column -%} + {{ sort_direction or 'asc' }} + {%- else -%} + {{ sort_direction or 'desc' }} + {%- endif -%} + {%- endset -%} {%- set target_relation = adapter.get_relation(database=table.database, schema=table.schema, @@ -46,7 +54,7 @@ Returns: {{ order_by }} as sort_column from {{ target_relation }} group by 1 - order by {{ order_by }} {{ sort_direction or "desc" }} + order by {{ order_by }} {{ sort_direction }} {% if max_records is not none %} limit {{ max_records }}