From b4aa6006d79560b0e50edc50cdeb59ba01287f0d Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Mon, 6 May 2024 22:59:37 -0700 Subject: [PATCH 1/6] Add stack var and file association tables --- .github/workflows/ci.yaml | 3 ++- .../schemachange-config.yml | 1 + ...2.16.0__create_file_association_stages.sql | 7 ++++++ ....1__add_initial_file_association_table.sql | 25 +++++++++++++++++++ .../V2.16.2__add_file_association_tasks.sql | 21 ++++++++++++++++ 5 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 synapse_data_warehouse/synapse_raw/V2.16.0__create_file_association_stages.sql create mode 100644 synapse_data_warehouse/synapse_raw/tables/V2.16.1__add_initial_file_association_table.sql create mode 100644 synapse_data_warehouse/synapse_raw/tasks/V2.16.2__add_file_association_tasks.sql diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3ddf91d2..1b317533 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -36,7 +36,7 @@ jobs: SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWSQL_WAREHOUSE }} SNOWFLAKE_SYNAPSE_STAGE_STORAGE_INTEGRATION: ${{ vars.SNOWFLAKE_SYNAPSE_STAGE_STORAGE_INTEGRATION }} SNOWFLAKE_SYNAPSE_STAGE_URL: ${{ vars.SNOWFLAKE_SYNAPSE_STAGE_URL }} - + STACK: ${{ vars.STACK }} steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v4 @@ -156,6 +156,7 @@ jobs: SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE: ${{ vars.SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE }} SNOWFLAKE_SYNAPSE_STAGE_STORAGE_INTEGRATION: ${{ vars.SNOWFLAKE_SYNAPSE_STAGE_STORAGE_INTEGRATION }} SNOWFLAKE_SYNAPSE_STAGE_URL: ${{ vars.SNOWFLAKE_SYNAPSE_STAGE_URL }} + STACK: ${{ vars.STACK }} steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v4 diff --git a/synapse_data_warehouse/schemachange-config.yml b/synapse_data_warehouse/schemachange-config.yml index 00f35e4f..d7a31670 100644 --- a/synapse_data_warehouse/schemachange-config.yml +++ b/synapse_data_warehouse/schemachange-config.yml @@ -12,6 +12,7 @@ vars: database_name: {{env_var('SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE')}} stage_storage_integration: {{env_var('SNOWFLAKE_SYNAPSE_STAGE_STORAGE_INTEGRATION')}} stage_url: {{env_var('SNOWFLAKE_SYNAPSE_STAGE_URL')}} + stack: {{env_var('STACK')}} # secrets: # # not a good example of secrets, just here to demo the secret filtering # trips_s3_bucket: s3://snowflake-workshop-lab/citibike-trips diff --git a/synapse_data_warehouse/synapse_raw/V2.16.0__create_file_association_stages.sql b/synapse_data_warehouse/synapse_raw/V2.16.0__create_file_association_stages.sql new file mode 100644 index 00000000..d25d22c6 --- /dev/null +++ b/synapse_data_warehouse/synapse_raw/V2.16.0__create_file_association_stages.sql @@ -0,0 +1,7 @@ +USE SCHEMA {{database_name}}.synapse_raw; --noqa: PRS,TMP +CREATE STAGE IF NOT EXISTS synapse_filehandles_stage + STORAGE_INTEGRATION = {{stage_storage_integration}} --noqa: TMP + URL = 's3://{{stack}}.filehandles.sagebase.org/fileHandleAssociations/records/' --noqa: TMP + FILE_FORMAT = (TYPE = PARQUET COMPRESSION = AUTO) + DIRECTORY = (ENABLE = TRUE); +ALTER STAGE IF EXISTS synapse_filehandles_stage REFRESH; diff --git a/synapse_data_warehouse/synapse_raw/tables/V2.16.1__add_initial_file_association_table.sql b/synapse_data_warehouse/synapse_raw/tables/V2.16.1__add_initial_file_association_table.sql new file mode 100644 index 00000000..4a45d428 --- /dev/null +++ b/synapse_data_warehouse/synapse_raw/tables/V2.16.1__add_initial_file_association_table.sql @@ -0,0 +1,25 @@ +USE SCHEMA {{database_name}}.synapse_raw; --noqa: JJ01,PRS,TMP +USE WAREHOUSE COMPUTE_MEDIUM; +CREATE TABLE IF NOT EXISTS filehandle_association ( + associateid int, + associatetype string, + filehandleid INT, + instance string, + stack string, + timestamp timestamp +) +CLUSTER BY (instance); + +copy into + filehandle_association +from ( + select + $1:associateid as associateid, + $1:associatetype as associatetype, + $1:filehandleid as filehandleid, + $1:instance as instance, + $1:stack as stack, + $1:timestamp as timestamp + from + @synapse_filehandles_stage --noqa: TMP +); diff --git a/synapse_data_warehouse/synapse_raw/tasks/V2.16.2__add_file_association_tasks.sql b/synapse_data_warehouse/synapse_raw/tasks/V2.16.2__add_file_association_tasks.sql new file mode 100644 index 00000000..6a78709f --- /dev/null +++ b/synapse_data_warehouse/synapse_raw/tasks/V2.16.2__add_file_association_tasks.sql @@ -0,0 +1,21 @@ +use role accountadmin; +use schema {{database_name}}.synapse_raw; --noqa: JJ01,PRS,TMP +alter task refresh_synapse_warehouse_s3_stage_task suspend; +create task if not exists append_to_fileinventory_task + user_task_managed_initial_warehouse_size = 'SMALL' + AFTER refresh_synapse_warehouse_s3_stage_task +as + copy into + filehandle_association + from ( + select + $1:associateid as associateid, + $1:associatetype as associatetype, + $1:filehandleid as filehandleid, + $1:instance as instance, + $1:stack as stack, + $1:timestamp as timestamp + from + @synapse_filehandles_stage --noqa: TMP + ); +SELECT SYSTEM$TASK_DEPENDENTS_ENABLE('refresh_synapse_warehouse_s3_stage_task'); From a1abbd74ae39078c01a2d4e17173b3e07c20e98b Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Tue, 7 May 2024 10:23:18 -0700 Subject: [PATCH 2/6] Add new integration --- admin/integrations.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/admin/integrations.sql b/admin/integrations.sql index d3870530..0acc69e4 100644 --- a/admin/integrations.sql +++ b/admin/integrations.sql @@ -7,7 +7,7 @@ CREATE STORAGE INTEGRATION IF NOT EXISTS synapse_prod_warehouse_s3 TYPE = EXTERNAL_STAGE STORAGE_PROVIDER = 'S3' ENABLED = TRUE - STORAGE_AWS_ROLE_ARN = 'arn:aws:iam::325565585839:role/snowflake-accesss-SnowflakeServiceRole-HL66JOP7K4BT' + STORAGE_AWS_ROLE_ARN = 'arn:aws:iam::325565585839:role/snowflake-access-SnowflakeServiceRole-2JSCDRkX8TcW' STORAGE_ALLOWED_LOCATIONS = ('s3://prod.datawarehouse.sagebase.org'); -- DESC INTEGRATION synapse_prod_warehouse_s3; From 66013b2726bdf2b6a0330814c6a386629929950c Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Wed, 8 May 2024 12:07:03 -0700 Subject: [PATCH 3/6] Patch up parser --- sage/portal_elt.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/sage/portal_elt.py b/sage/portal_elt.py index d5345be1..6e427c5f 100644 --- a/sage/portal_elt.py +++ b/sage/portal_elt.py @@ -1,22 +1,30 @@ -from dotenv import dotenv_values import snowflake.connector from snowflake.connector.pandas_tools import write_pandas import synapseclient import pandas as pd +import configparser +import os +config = configparser.ConfigParser() +config.read(os.path.expanduser('~/.snowsql/config')) -syn = synapseclient.login() +snowflake_config = config['connections'] -config = dotenv_values("../.env") +syn = synapseclient.login() +# config = dotenv_values("../.env") +print(snowflake_config['username']) ctx = snowflake.connector.connect( - user=config['user'], - password=config['password'], - account=config['snowflake_account'], + user=snowflake_config['username'], + account=snowflake_config['accountname'], + authenticator=snowflake_config['authenticator'], database="sage", schema="portal_raw", role="SYSADMIN", - warehouse="compute_xsmall" + warehouse="compute_xsmall", + login_timeout=60, + network_timeout=30, + socket_timeout=10 ) cs = ctx.cursor() From 21f50ca21cfe71f9d6c25f00947b54d49969bde3 Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Wed, 8 May 2024 19:01:22 -0700 Subject: [PATCH 4/6] Add comment --- .../V2.16.1__add_initial_file_association_table.sql | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/synapse_data_warehouse/synapse_raw/tables/V2.16.1__add_initial_file_association_table.sql b/synapse_data_warehouse/synapse_raw/tables/V2.16.1__add_initial_file_association_table.sql index 4a45d428..77ed9e90 100644 --- a/synapse_data_warehouse/synapse_raw/tables/V2.16.1__add_initial_file_association_table.sql +++ b/synapse_data_warehouse/synapse_raw/tables/V2.16.1__add_initial_file_association_table.sql @@ -1,13 +1,14 @@ USE SCHEMA {{database_name}}.synapse_raw; --noqa: JJ01,PRS,TMP USE WAREHOUSE COMPUTE_MEDIUM; CREATE TABLE IF NOT EXISTS filehandle_association ( - associateid int, - associatetype string, - filehandleid INT, - instance string, - stack string, - timestamp timestamp + associateid INT COMMENT 'The unique identifier of the Synapse object that wraps the file.', + associatetype STRING COMMENT 'The type of the Synapse object that wraps the file.', + filehandleid INT COMMENT 'The unique identifier of the file handle.', + instance STRING COMMENT 'The version of the stack that processed the file association.', + stack STRING COMMENT 'The stack (prod, dev) on which the file handle association processed.', + timestamp TIMESTAMP COMMENT 'The time when the association data was collected.' ) +COMMENT='The table contains file handle association records that are weekly scanned. A FileHandleAssociation record is a FileHandle (identified by its id) along with a Synapse object (identified by its id and type).' CLUSTER BY (instance); copy into From 1508b64c2ade59bc1b77a720bc409a54f591ca7a Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Thu, 9 May 2024 09:02:09 -0700 Subject: [PATCH 5/6] Update synapse_data_warehouse/synapse_raw/tasks/V2.16.2__add_file_association_tasks.sql --- .../synapse_raw/tasks/V2.16.2__add_file_association_tasks.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse_data_warehouse/synapse_raw/tasks/V2.16.2__add_file_association_tasks.sql b/synapse_data_warehouse/synapse_raw/tasks/V2.16.2__add_file_association_tasks.sql index 6a78709f..5327e2da 100644 --- a/synapse_data_warehouse/synapse_raw/tasks/V2.16.2__add_file_association_tasks.sql +++ b/synapse_data_warehouse/synapse_raw/tasks/V2.16.2__add_file_association_tasks.sql @@ -1,7 +1,7 @@ use role accountadmin; use schema {{database_name}}.synapse_raw; --noqa: JJ01,PRS,TMP alter task refresh_synapse_warehouse_s3_stage_task suspend; -create task if not exists append_to_fileinventory_task +create task if not exists append_to_fileassociation_task user_task_managed_initial_warehouse_size = 'SMALL' AFTER refresh_synapse_warehouse_s3_stage_task as From 4f7728479f142816823cb854dd776ecb8ac6245d Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Thu, 9 May 2024 09:05:09 -0700 Subject: [PATCH 6/6] Update to snapshots --- .../tables/V2.16.1__add_initial_file_association_table.sql | 4 ++-- .../synapse_raw/tasks/V2.16.2__add_file_association_tasks.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse_data_warehouse/synapse_raw/tables/V2.16.1__add_initial_file_association_table.sql b/synapse_data_warehouse/synapse_raw/tables/V2.16.1__add_initial_file_association_table.sql index 77ed9e90..7ce2806b 100644 --- a/synapse_data_warehouse/synapse_raw/tables/V2.16.1__add_initial_file_association_table.sql +++ b/synapse_data_warehouse/synapse_raw/tables/V2.16.1__add_initial_file_association_table.sql @@ -1,6 +1,6 @@ USE SCHEMA {{database_name}}.synapse_raw; --noqa: JJ01,PRS,TMP USE WAREHOUSE COMPUTE_MEDIUM; -CREATE TABLE IF NOT EXISTS filehandle_association ( +CREATE TABLE IF NOT EXISTS filehandleassociationsnapshots ( associateid INT COMMENT 'The unique identifier of the Synapse object that wraps the file.', associatetype STRING COMMENT 'The type of the Synapse object that wraps the file.', filehandleid INT COMMENT 'The unique identifier of the file handle.', @@ -12,7 +12,7 @@ COMMENT='The table contains file handle association records that are weekly scan CLUSTER BY (instance); copy into - filehandle_association + filehandleassociationsnapshots from ( select $1:associateid as associateid, diff --git a/synapse_data_warehouse/synapse_raw/tasks/V2.16.2__add_file_association_tasks.sql b/synapse_data_warehouse/synapse_raw/tasks/V2.16.2__add_file_association_tasks.sql index 6a78709f..28d94043 100644 --- a/synapse_data_warehouse/synapse_raw/tasks/V2.16.2__add_file_association_tasks.sql +++ b/synapse_data_warehouse/synapse_raw/tasks/V2.16.2__add_file_association_tasks.sql @@ -1,12 +1,12 @@ use role accountadmin; use schema {{database_name}}.synapse_raw; --noqa: JJ01,PRS,TMP alter task refresh_synapse_warehouse_s3_stage_task suspend; -create task if not exists append_to_fileinventory_task +create task if not exists append_to_filehandleassociationsnapshots_task user_task_managed_initial_warehouse_size = 'SMALL' AFTER refresh_synapse_warehouse_s3_stage_task as copy into - filehandle_association + filehandleassociationsnapshots from ( select $1:associateid as associateid,