Skip to content

Commit 3dbd5a7

Browse files
authored
Merge pull request #37 from Sage-Bionetworks/SNOW-69-file-association-table
[SNOW-69] Add stack env variable and fileassociation tables
2 parents f07621e + e7b2ab3 commit 3dbd5a7

File tree

7 files changed

+73
-9
lines changed

7 files changed

+73
-9
lines changed

.github/workflows/ci.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636
SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWSQL_WAREHOUSE }}
3737
SNOWFLAKE_SYNAPSE_STAGE_STORAGE_INTEGRATION: ${{ vars.SNOWFLAKE_SYNAPSE_STAGE_STORAGE_INTEGRATION }}
3838
SNOWFLAKE_SYNAPSE_STAGE_URL: ${{ vars.SNOWFLAKE_SYNAPSE_STAGE_URL }}
39-
39+
STACK: ${{ vars.STACK }}
4040
steps:
4141
- uses: actions/checkout@v4
4242
- uses: actions/setup-python@v4
@@ -156,6 +156,7 @@ jobs:
156156
SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE: ${{ vars.SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE }}
157157
SNOWFLAKE_SYNAPSE_STAGE_STORAGE_INTEGRATION: ${{ vars.SNOWFLAKE_SYNAPSE_STAGE_STORAGE_INTEGRATION }}
158158
SNOWFLAKE_SYNAPSE_STAGE_URL: ${{ vars.SNOWFLAKE_SYNAPSE_STAGE_URL }}
159+
STACK: ${{ vars.STACK }}
159160
steps:
160161
- uses: actions/checkout@v4
161162
- uses: actions/setup-python@v4

admin/integrations.sql

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ CREATE STORAGE INTEGRATION IF NOT EXISTS synapse_prod_warehouse_s3
77
TYPE = EXTERNAL_STAGE
88
STORAGE_PROVIDER = 'S3'
99
ENABLED = TRUE
10-
STORAGE_AWS_ROLE_ARN = 'arn:aws:iam::325565585839:role/snowflake-accesss-SnowflakeServiceRole-HL66JOP7K4BT'
10+
STORAGE_AWS_ROLE_ARN = 'arn:aws:iam::325565585839:role/snowflake-access-SnowflakeServiceRole-2JSCDRkX8TcW'
1111
STORAGE_ALLOWED_LOCATIONS = ('s3://prod.datawarehouse.sagebase.org', 's3://prod.filehandles.sagebase.org');
1212

1313
-- DESC INTEGRATION synapse_prod_warehouse_s3;

sage/portal_elt.py

+15-7
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,30 @@
1-
from dotenv import dotenv_values
21
import snowflake.connector
32
from snowflake.connector.pandas_tools import write_pandas
43
import synapseclient
54
import pandas as pd
5+
import configparser
6+
import os
67

8+
config = configparser.ConfigParser()
9+
config.read(os.path.expanduser('~/.snowsql/config'))
710

8-
syn = synapseclient.login()
11+
snowflake_config = config['connections']
912

10-
config = dotenv_values("../.env")
13+
syn = synapseclient.login()
1114

15+
# config = dotenv_values("../.env")
16+
print(snowflake_config['username'])
1217
ctx = snowflake.connector.connect(
13-
user=config['user'],
14-
password=config['password'],
15-
account=config['snowflake_account'],
18+
user=snowflake_config['username'],
19+
account=snowflake_config['accountname'],
20+
authenticator=snowflake_config['authenticator'],
1621
database="sage",
1722
schema="portal_raw",
1823
role="SYSADMIN",
19-
warehouse="compute_xsmall"
24+
warehouse="compute_xsmall",
25+
login_timeout=60,
26+
network_timeout=30,
27+
socket_timeout=10
2028
)
2129

2230
cs = ctx.cursor()

synapse_data_warehouse/schemachange-config.yml

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ vars:
1212
database_name: {{env_var('SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE')}}
1313
stage_storage_integration: {{env_var('SNOWFLAKE_SYNAPSE_STAGE_STORAGE_INTEGRATION')}}
1414
stage_url: {{env_var('SNOWFLAKE_SYNAPSE_STAGE_URL')}}
15+
stack: {{env_var('STACK')}}
1516
# secrets:
1617
# # not a good example of secrets, just here to demo the secret filtering
1718
# trips_s3_bucket: s3://snowflake-workshop-lab/citibike-trips
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
USE SCHEMA {{database_name}}.synapse_raw; --noqa: PRS,TMP
2+
CREATE STAGE IF NOT EXISTS synapse_filehandles_stage
3+
STORAGE_INTEGRATION = {{stage_storage_integration}} --noqa: TMP
4+
URL = 's3://{{stack}}.filehandles.sagebase.org/fileHandleAssociations/records/' --noqa: TMP
5+
FILE_FORMAT = (TYPE = PARQUET COMPRESSION = AUTO)
6+
DIRECTORY = (ENABLE = TRUE);
7+
ALTER STAGE IF EXISTS synapse_filehandles_stage REFRESH;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
USE SCHEMA {{database_name}}.synapse_raw; --noqa: JJ01,PRS,TMP
2+
USE WAREHOUSE COMPUTE_MEDIUM;
3+
CREATE TABLE IF NOT EXISTS filehandleassociationsnapshots (
4+
associateid INT COMMENT 'The unique identifier of the Synapse object that wraps the file.',
5+
associatetype STRING COMMENT 'The type of the Synapse object that wraps the file.',
6+
filehandleid INT COMMENT 'The unique identifier of the file handle.',
7+
instance STRING COMMENT 'The version of the stack that processed the file association.',
8+
stack STRING COMMENT 'The stack (prod, dev) on which the file handle association processed.',
9+
timestamp TIMESTAMP COMMENT 'The time when the association data was collected.'
10+
)
11+
COMMENT='The table contains file handle association records that are weekly scanned. A FileHandleAssociation record is a FileHandle (identified by its id) along with a Synapse object (identified by its id and type).'
12+
CLUSTER BY (instance);
13+
14+
copy into
15+
filehandleassociationsnapshots
16+
from (
17+
select
18+
$1:associateid as associateid,
19+
$1:associatetype as associatetype,
20+
$1:filehandleid as filehandleid,
21+
$1:instance as instance,
22+
$1:stack as stack,
23+
$1:timestamp as timestamp
24+
from
25+
@synapse_filehandles_stage --noqa: TMP
26+
);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
use role accountadmin;
2+
use schema {{database_name}}.synapse_raw; --noqa: JJ01,PRS,TMP
3+
alter task refresh_synapse_warehouse_s3_stage_task suspend;
4+
create task if not exists append_to_filehandleassociationsnapshots_task
5+
user_task_managed_initial_warehouse_size = 'SMALL'
6+
AFTER refresh_synapse_warehouse_s3_stage_task
7+
as
8+
copy into
9+
filehandleassociationsnapshots
10+
from (
11+
select
12+
$1:associateid as associateid,
13+
$1:associatetype as associatetype,
14+
$1:filehandleid as filehandleid,
15+
$1:instance as instance,
16+
$1:stack as stack,
17+
$1:timestamp as timestamp
18+
from
19+
@synapse_filehandles_stage --noqa: TMP
20+
);
21+
SELECT SYSTEM$TASK_DEPENDENTS_ENABLE('refresh_synapse_warehouse_s3_stage_task');

0 commit comments

Comments
 (0)