Skip to content

Commit

Permalink
Merge branch 'main' into stable
Browse files Browse the repository at this point in the history
  • Loading branch information
Jesus89 committed Oct 25, 2023
2 parents 93412d3 + 7cf0c8a commit 1f6552b
Show file tree
Hide file tree
Showing 46 changed files with 805 additions and 22 deletions.
1 change: 1 addition & 0 deletions .github/workflows/bigquery.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ jobs:
runs-on: ubuntu-20.04
timeout-minutes: 20
strategy:
fail-fast: false
matrix:
include:
- region: us
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/publish-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,16 @@ jobs:
prerelease: false
token: ${{ secrets.CARTOFANTE_GITHUB_TOKEN }}
body: ${{ env.RELEASE_BODY }}
- name: Create PR from stable into main branch
- name: Create PR to main branch
uses: repo-sync/pull-request@v2
id: pull_request
with:
source_branch: stable
source_branch: ${{ github.event.pull_request.head.ref }}
destination_branch: main
pr_title: Merge stable into main branch
pr_title: Merge release into main branch
pr_body: |
This PR merges the stable branch back into main.
This happens to ensure that the updates that happend on the stable branch, i.e. CHANGELOG updates are also present on the main branch.
This PR merges the release branch back into main.
This happens to ensure that the updates that happend on the stable branch.
- name: Merge PR if possible
continue-on-error: true
env:
Expand Down
4 changes: 2 additions & 2 deletions clouds/bigquery/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ lint-common: venv3 $(NODE_MODULES_DEV)
PATH="$(NODE_MODULES_DEV)/.bin/:$(PATH)" \
markdownlint -f '*.md' --ignore node_modules --disable MD013 MD024 MD033 MD036 MD040 MD041 MD051 MD045 --
echo "- Lint Python files"
$(VENV3_BIN)/brunette $(COMMON_DIR) --line-length=88 --single-quotes --quiet
$(VENV3_BIN)/flake8 $(COMMON_DIR) --max-line-length=88 --enable-extensions Q0 --ignore=D100,D103,D104,E203
$(VENV3_BIN)/brunette $(COMMON_DIR) --exclude node_modules --line-length=88 --single-quotes --quiet
$(VENV3_BIN)/flake8 $(COMMON_DIR) --exclude node_modules --max-line-length=88 --enable-extensions Q0 --ignore=D100,D103,D104,E203

build:
rm -rf $(BUILD_DIR)
Expand Down
2 changes: 1 addition & 1 deletion clouds/bigquery/common/.sqlfluff
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ templater = jinja
# Comma separated list of rules to check, default to all
rules = all
# Comma separated list of rules to exclude, or None
exclude_rules = L016,L025,L031,L032,L029,L022,L036,L003,L064,L061,L027,L017,L049
exclude_rules = L016,L025,L031,L032,L029,L022,L036,L003,L064,L061,L027,L017,L049,L044
# The depth to recursively parse to (0 for unlimited)
recurse = 0
# Below controls SQLFluff output, see max_line_length for SQL output
Expand Down
2 changes: 1 addition & 1 deletion clouds/bigquery/common/build_modules.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ functionsFilter.forEach(f => {
if (!nodeps) {
functions.forEach(mainFunction => {
functions.forEach(depFunction => {
if (mainFunction.name != depFunction.name) {
if (mainFunction.name != depFunction.name && depFunction.name !== 'SETUP') {
const depFunctionMatches = [];
depFunctionMatches.push(...depFunction.content.replace(/(\r\n|\n|\r)/gm,' ').matchAll(new RegExp('(?<=(?<!TEMP )FUNCTION)(.*?)(?=AS |RETURNS)','g')));
depFunctionMatches.push(...depFunction.content.replace(/(\r\n|\n|\r)/gm,' ').matchAll(new RegExp('(?<=PROCEDURE)(.*?)(?=BEGIN)','g')));
Expand Down
2 changes: 1 addition & 1 deletion clouds/bigquery/common/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"@rollup/plugin-replace": "^2.4.2",
"cli-progress": "^3.11.2",
"eslint": "^7.25.0",
"jest": "^26.6.3",
"jest": "^29.0.0",
"markdownlint-cli": "^0.32.2",
"rollup": "^2.47.0",
"rollup-plugin-bundle-size": "^1.0.3",
Expand Down
2 changes: 1 addition & 1 deletion clouds/bigquery/common/run-script.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ const bar = new cliProgress.SingleBar(options, cliProgress.Presets.shades_classi
const client = new BigQuery({ projectId: `${BQ_PROJECT}`, timeout: 600000 });

async function runQueries (queries) {
const query_options = { 'timeoutMs' : 120000 };
const query_options = { 'timeoutMs' : 600000 };
const n = queries.length;
bar.start(n, 0);
for (let i = 0; i < n; i++) {
Expand Down
3 changes: 3 additions & 0 deletions clouds/bigquery/modules/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ endif
ifdef BQ_PERMISSIONS_GROUP
BQ_PERMISSIONS_TARGET_DATASET=$(BQ_DEPLOY_DATASET) $(COMMON_DIR)/set_module_permissions_group.sh
endif
$(MAKE) extra-deploy

extra-deploy::

dataset-create:
$(BQ) show $(BQ_DEPLOY_DATASET) 2>/dev/null 1>/dev/null || \
Expand Down
2 changes: 1 addition & 1 deletion clouds/bigquery/modules/doc/h3/H3_POLYFILL_MODE.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ H3_POLYFILL_MODE(geog, resolution, mode)

**Description**

Returns an array of quadbin cell indexes contained in the given geography at a given level of detail. Containment is determined by the mode: center, intersects, contains.
Returns an array of H3 cell indexes contained in the given geography at a given level of detail. Containment is determined by the mode: center, intersects, contains.

* `geog`: `GEOGRAPHY` representing the shape to cover.
* `resolution`: `INT64` level of detail. The value must be between 0 and 15 ([H3 resolution table](https://h3geo.org/docs/core-library/restable)).
Expand Down
58 changes: 58 additions & 0 deletions clouds/bigquery/modules/doc/h3/H3_POLYFILL_TABLE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
## H3_POLYFILL_TABLE (BETA)

```sql:signature
H3_POLYFILL_TABLE(input_query, resolution, mode, output_table)
```

**Description**

Returns a table with the H3 cell indexes contained in the given geography at a given level of detail. Containment is determined by the mode: center, intersects, contains. All the attributes except the geography will be included in the output table, clustered by the h3 column.

* `input_query`: `STRING` input data to polyfill. It must contain a column `geom` with the shape to cover. Additionally, other columns can be included.
* `resolution`: `INT64` level of detail. The value must be between 0 and 15 ([H3 resolution table](https://h3geo.org/docs/core-library/restable)).
* `mode`: `STRING`
* `center` returns the indexes of the H3 cells which centers intersect the input geography (polygon). The resulting H3 set does not fully cover the input geography, however, this is **significantly faster** that the other modes. This mode is not compatible with points or lines. Equivalent to [`H3_POLYFILL`](h3#h3_polyfill).
* `intersects` returns the indexes of the H3 cells that intersect the input geography. The resulting H3 set will completely cover the input geography (point, line, polygon).
* `contains` returns the indexes of the H3 cells that are entirely contained inside the input geography (polygon). This mode is not compatible with points or lines.
* `output_table`: `STRING` name of the output table to store the results of the polyfill.

Mode `center`:

![](h3_polyfill_mode_center.png)

Mode `intersects`:

![](h3_polyfill_mode_intersects.png)

Mode `contains`:

![](h3_polyfill_mode_contains.png)

**Output**

The results are stored in the table named `<output_table>`, which contains the following columns:

* `h3`: `STRING` the geometry of the considered point.
* The rest of columns included in `input_query` except `geom`.

**Examples**

```sql
CALL carto.H3_POLYFILL_TABLE(
"SELECT ST_GEOGFROMTEXT('POLYGON ((-3.71219873428345 40.413365349070865, -3.7144088745117 40.40965661286395, -3.70659828186035 40.409525904775634, -3.71219873428345 40.413365349070865))') AS geom",
9, 'intersects',
'<project>.<dataset>.<output_table>'
);
-- The table `<project>.<dataset>.<output_table>` will be created
-- with column: h3
```

```sql
CALL carto.H3_POLYFILL_TABLE(
'SELECT geom, name, value FROM `<project>.<dataset>.<table>`',
9, 'center',
'<project>.<dataset>.<output_table>'
);
-- The table `<project>.<dataset>.<output_table>` will be created
-- with columns: h3, name, value
```
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
58 changes: 58 additions & 0 deletions clouds/bigquery/modules/doc/quadbin/QUADBIN_POLYFILL_TABLE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
## QUADBIN_POLYFILL_TABLE (BETA)

```sql:signature
QUADBIN_POLYFILL_TABLE(input_query, resolution, mode, output_table)
```

**Description**

Returns a table with the quadbin cell indexes contained in the given geography at a given level of detail. Containment is determined by the mode: center, intersects, contains. All the attributes except the geography will be included in the output table, clustered by the quadbin column.

* `input_query`: `STRING` input data to polyfill. It must contain a column `geom` with the shape to cover. Additionally, other columns can be included.
* `resolution`: `INT64` level of detail. The value must be between 0 and 26.
* `mode`: `STRING`
* `center` returns the indexes of the quadbin cells which centers intersect the input geography (polygon). The resulting quadbin set does not fully cover the input geography, however, this is **significantly faster** that the other modes. This mode is not compatible with points or lines. Equivalent to [`QUADBIN_POLYFILL`](quadbin#quadbin_polyfill).
* `intersects` returns the indexes of the quadbin cells that intersect the input geography. The resulting quadbin set will completely cover the input geography (point, line, polygon).
* `contains` returns the indexes of the quadbin cells that are entirely contained inside the input geography (polygon). This mode is not compatible with points or lines.
* `output_table`: `STRING` name of the output table to store the results of the polyfill.

Mode `center`:

![](quadbin_polyfill_mode_center.png)

Mode `intersects`:

![](quadbin_polyfill_mode_intersects.png)

Mode `contains`:

![](quadbin_polyfill_mode_contains.png)

**Output**

The results are stored in the table named `<output_table>`, which contains the following columns:

* `quadbin`: `INT64` the geometry of the considered point.
* The rest of columns included in `input_query` except `geom`.

**Examples**

```sql
CALL carto.QUADBIN_POLYFILL_TABLE(
"SELECT ST_GEOGFROMTEXT('POLYGON ((-3.71219873428345 40.413365349070865, -3.7144088745117 40.40965661286395, -3.70659828186035 40.409525904775634, -3.71219873428345 40.413365349070865))') AS geom",
12, 'intersects',
'<project>.<dataset>.<output_table>'
);
-- The table `<project>.<dataset>.<output_table>` will be created
-- with column: quadbin
```

```sql
CALL carto.QUADBIN_POLYFILL_TABLE(
'SELECT geom, name, value FROM `<project>.<dataset>.<table>`',
12, 'center',
'<project>.<dataset>.<output_table>'
);
-- The table `<project>.<dataset>.<output_table>` will be created
-- with columns: quadbin, name, value
```
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
59 changes: 59 additions & 0 deletions clouds/bigquery/modules/sql/h3/H3_POLYFILL_TABLE.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
----------------------------
-- Copyright (C) 2023 CARTO
----------------------------

CREATE OR REPLACE FUNCTION `@@BQ_DATASET@@.__H3_POLYFILL_QUERY`
(
input_query STRING,
resolution INT64,
mode STRING,
output_table STRING
)
RETURNS STRING
DETERMINISTIC
LANGUAGE js
AS """
if (!['center', 'intersects', 'contains'].includes(mode)) {
throw Error('Invalid mode, should be center, intersects, or contains.')
}
if (resolution < 0 || resolution > 15) {
throw Error('Invalid resolution, should be between 0 and 15.')
}
output_table = output_table.replace(/`/g, '')
const containmentFunction = (mode === 'contains') ? 'ST_CONTAINS' : 'ST_INTERSECTS'
const cellFunction = (mode === 'center') ? '@@BQ_DATASET@@.H3_CENTER' : '@@BQ_DATASET@@.H3_BOUNDARY'
return 'CREATE TABLE `' + output_table + '` CLUSTER BY (h3) AS\\n' +
'WITH __input AS (' + input_query + '),\\n' +
'__cells AS (SELECT h3, i.* FROM __input AS i,\\n' +
'UNNEST(`@@BQ_DATASET@@.__H3_POLYFILL_INIT`(geom,`@@BQ_DATASET@@.__H3_POLYFILL_INIT_Z`(geom,' + resolution + '))) AS parent,\\n' +
'UNNEST(`@@BQ_DATASET@@.H3_TOCHILDREN`(parent,' + resolution + ')) AS h3)\\n' +
'SELECT * EXCEPT (geom) FROM __cells\\n' +
'WHERE ' + containmentFunction + '(geom, `' + cellFunction + '`(h3));'
""";

CREATE OR REPLACE PROCEDURE `@@BQ_DATASET@@.H3_POLYFILL_TABLE`
(
input_query STRING,
resolution INT64,
mode STRING,
output_table STRING
)
BEGIN
DECLARE polyfill_query STRING;

-- Check if the destination tileset already exists
CALL `@@BQ_DATASET@@.__CHECK_TABLE`(output_table);

SET polyfill_query = `@@BQ_DATASET@@.__H3_POLYFILL_QUERY`(
input_query,
resolution,
mode,
output_table
);

EXECUTE IMMEDIATE polyfill_query;
END;
59 changes: 59 additions & 0 deletions clouds/bigquery/modules/sql/quadbin/QUADBIN_POLYFILL_TABLE.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
----------------------------
-- Copyright (C) 2023 CARTO
----------------------------

CREATE OR REPLACE FUNCTION `@@BQ_DATASET@@.__QUADBIN_POLYFILL_QUERY`
(
input_query STRING,
resolution INT64,
mode STRING,
output_table STRING
)
RETURNS STRING
DETERMINISTIC
LANGUAGE js
AS """
if (!['center', 'intersects', 'contains'].includes(mode)) {
throw Error('Invalid mode, should be center, intersects, or contains.')
}
if (resolution < 0 || resolution > 26) {
throw Error('Invalid resolution, should be between 0 and 26.')
}
output_table = output_table.replace(/`/g, '')
const containmentFunction = (mode === 'contains') ? 'ST_CONTAINS' : 'ST_INTERSECTS'
const cellFunction = (mode === 'center') ? '@@BQ_DATASET@@.QUADBIN_CENTER' : '@@BQ_DATASET@@.QUADBIN_BOUNDARY'
return 'CREATE TABLE `' + output_table + '` CLUSTER BY (quadbin) AS\\n' +
'WITH __input AS (' + input_query + '),\\n' +
'__cells AS (SELECT quadbin, i.* FROM __input AS i,\\n' +
'UNNEST(`@@BQ_DATASET@@.__QUADBIN_POLYFILL_INIT`(geom,`@@BQ_DATASET@@.__QUADBIN_POLYFILL_INIT_Z`(geom,' + resolution + '))) AS parent,\\n' +
'UNNEST(`@@BQ_DATASET@@.QUADBIN_TOCHILDREN`(parent,' + resolution + ')) AS quadbin)\\n' +
'SELECT * EXCEPT (geom) FROM __cells\\n' +
'WHERE ' + containmentFunction + '(geom, `' + cellFunction + '`(quadbin));'
""";

CREATE OR REPLACE PROCEDURE `@@BQ_DATASET@@.QUADBIN_POLYFILL_TABLE`
(
input_query STRING,
resolution INT64,
mode STRING,
output_table STRING
)
BEGIN
DECLARE polyfill_query STRING;

-- Check if the destination tileset already exists
CALL `@@BQ_DATASET@@.__CHECK_TABLE`(output_table);

SET polyfill_query = `@@BQ_DATASET@@.__QUADBIN_POLYFILL_QUERY`(
input_query,
resolution,
mode,
output_table
);

EXECUTE IMMEDIATE polyfill_query;
END;
33 changes: 33 additions & 0 deletions clouds/bigquery/modules/sql/utils/__CHECK_TABLE.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
---------------------------------
-- Copyright (C) 2020-2021 CARTO
---------------------------------

CREATE OR REPLACE PROCEDURE `@@BQ_DATASET@@.__CHECK_TABLE`
(destination_table STRING)
BEGIN
DECLARE destination_parts DEFAULT (SELECT `@@BQ_DATASET@@.__TABLENAME_SPLIT`(destination_table));
DECLARE tables_metadata STRING;
DECLARE table_name STRING;
DECLARE num_tables INT64;

IF destination_parts IS NULL OR destination_parts.table IS NULL OR destination_parts.dataset IS NULL THEN
SELECT ERROR("The output table does not have a correct format, i.e. [projectID].dataset.tablename. Please, use a different output table name and try again.");
END IF;

SET table_name = destination_parts.table;
SET tables_metadata = `@@BQ_DATASET@@.__TABLENAME_JOIN`((destination_parts.project, destination_parts.dataset, '__TABLES__'));

EXECUTE IMMEDIATE FORMAT(
'''
SELECT COUNT(size_bytes)
FROM %s
WHERE table_id='%s'
''',
tables_metadata,
table_name
) INTO num_tables;

IF num_tables > 0 THEN
SELECT ERROR("The output table to store the tileset already exists. Please, use a different output table name and try again.");
END IF;
END;
14 changes: 14 additions & 0 deletions clouds/bigquery/modules/sql/utils/__TABLENAME_JOIN.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
----------------------------
-- Copyright (C) 2021 CARTO
----------------------------

CREATE OR REPLACE FUNCTION `@@BQ_DATASET@@.__TABLENAME_JOIN`
(split_name STRUCT<project STRING, dataset STRING, table STRING>)
RETURNS STRING
AS (
IF(
split_name.project IS NULL,
FORMAT('`%s`.`%s`', split_name.dataset, split_name.table),
FORMAT('`%s`.`%s`.`%s`', split_name.project, split_name.dataset, split_name.table)
)
);
16 changes: 16 additions & 0 deletions clouds/bigquery/modules/sql/utils/__TABLENAME_SPLIT.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
----------------------------
-- Copyright (C) 2021 CARTO
----------------------------

CREATE OR REPLACE FUNCTION `@@BQ_DATASET@@.__TABLENAME_SPLIT`
(qualified_name STRING)
RETURNS STRUCT<project STRING, dataset STRING, table STRING>
AS ((
WITH unquoted AS (SELECT REPLACE(qualified_name, "`", "") AS name)

SELECT AS STRUCT
REGEXP_EXTRACT(name, r"^(.+)\..+\..+$") AS project,
COALESCE(REGEXP_EXTRACT(name, r"^.+\.(.+)\..+$"), REGEXP_EXTRACT(name, r"^(.+)\..+$")) AS dataset,
REGEXP_EXTRACT(name, r"^.+\.(.+)$") AS table
FROM unquoted
));
Loading

0 comments on commit 1f6552b

Please sign in to comment.