Skip to content

Commit 9e2fff0

Browse files
committed
Add recalculate_checksum command
In case you clone a database, you would want to recaclulate checksums for repeatable scripts as they already exist but checksum is wrong as database name has changed.
1 parent 69600d8 commit 9e2fff0

File tree

2 files changed

+141
-27
lines changed

2 files changed

+141
-27
lines changed

schemachange/cli.py

+140-26
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
from jinja2.loaders import BaseLoader
2020
from pandas import DataFrame
2121

22-
#region Global Variables
22+
#region Global Variables
2323
# metadata
24-
_schemachange_version = '3.5.3'
24+
_schemachange_version = '3.5.4'
2525
_config_file_name = 'schemachange-config.yml'
2626
_metadata_database_name = 'METADATA'
2727
_metadata_schema_name = 'SCHEMACHANGE'
@@ -48,7 +48,7 @@
4848
+ "{snowflake_role}\nUsing default warehouse {snowflake_warehouse}\nUsing default " \
4949
+ "database {snowflake_database}"
5050
_log_ch_use = "Using change history table {database_name}.{schema_name}.{table_name} " \
51-
+ "(last altered {last_altered})"
51+
+ "(last altered {last_altered})"
5252
_log_ch_create = "Created change history table {database_name}.{schema_name}.{table_name}"
5353
_err_ch_missing = "Unable to find change history table {database_name}.{schema_name}.{table_name}"
5454
_log_ch_max_version = "Max applied change script version: {max_published_version_display}"
@@ -66,7 +66,7 @@
6666
+ "please use a different name"
6767
_err_invalid_folder = "Invalid {folder_type} folder: {path}"
6868
_err_dup_scripts = "The script name {script_name} exists more than once (first_instance " \
69-
+ "{first_path}, second instance {script_full_path})"
69+
+ "{first_path}, second instance {script_full_path})"
7070
_err_dup_scripts_version = "The script version {script_version} exists more than once " \
7171
+ "(second instance {script_full_path})"
7272
_err_dup_undo_scripts_version = "The undo version {script_version} exists more than once " \
@@ -271,7 +271,7 @@ def authenticate(self):
271271
snowflake_password = None
272272
if os.getenv("SNOWFLAKE_PASSWORD") is not None and os.getenv("SNOWFLAKE_PASSWORD"):
273273
snowflake_password = os.getenv("SNOWFLAKE_PASSWORD")
274-
274+
275275
# Check legacy/deprecated env variable
276276
if os.getenv("SNOWSQL_PWD") is not None and os.getenv("SNOWSQL_PWD"):
277277
if snowflake_password:
@@ -312,20 +312,20 @@ def authenticate(self):
312312

313313
self.conArgs['private_key'] = pkb
314314
self.conArgs['authenticator'] = 'snowflake'
315-
316-
elif os.getenv("SNOWFLAKE_AUTHENTICATOR") == 'oauth' and os.getenv("SNOWFLAKE_AUTHENTICATOR"):
315+
316+
elif os.getenv("SNOWFLAKE_AUTHENTICATOR") == 'oauth' and os.getenv("SNOWFLAKE_AUTHENTICATOR"):
317317
oauth_token = self.get_oauth_token()
318-
318+
319319
if self.verbose:
320320
print( _log_auth_type % 'Oauth Access Token')
321321
self.conArgs['token'] = oauth_token
322322
self.conArgs['authenticator'] = 'oauth'
323-
323+
324324
elif os.getenv("SNOWFLAKE_AUTHENTICATOR") == 'externalbrowser' and os.getenv("SNOWFLAKE_AUTHENTICATOR"):
325325
self.conArgs['authenticator'] = 'externalbrowser'
326326
if self.verbose:
327327
print(_log_auth_type % 'External Browser')
328-
328+
329329
elif os.getenv("SNOWFLAKE_AUTHENTICATOR").lower()[:8]=='https://' \
330330
and os.getenv("SNOWFLAKE_AUTHENTICATOR"):
331331
okta = os.getenv("SNOWFLAKE_AUTHENTICATOR")
@@ -468,7 +468,7 @@ def record_change_script(self, script, script_content, change_history_table, exe
468468
# Compose and execute the insert statement to the log file
469469
query = self._q_ch_log.format(**frmt_args)
470470
self.execute_snowflake_query(query)
471-
471+
472472

473473
def deploy_command(config):
474474
req_args = set(['snowflake_account','snowflake_user','snowflake_role','snowflake_warehouse'])
@@ -613,6 +613,98 @@ def undo_command(config):
613613

614614
print(_log_undo_set_complete.format(scripts_applied=scripts_applied))
615615

616+
def recalculate_checksum(config):
617+
"""
618+
Recalculate checksum for repeatable migrations
619+
620+
Useful when cloning a database and want to just recalculate the checksums as you already know that database is in the
621+
latest state.
622+
623+
624+
"""
625+
req_args = set(['snowflake_account','snowflake_user','snowflake_role','snowflake_warehouse'])
626+
validate_auth_config(config, req_args)
627+
628+
# Log some additional details
629+
if config['dry_run']:
630+
print("Running in dry-run mode")
631+
print(_log_config_details.format(**config))
632+
633+
#connect to snowflake and maintain connection
634+
session = SnowflakeSchemachangeSession(config)
635+
636+
scripts_skipped = 0
637+
scripts_applied = 0
638+
639+
# Deal with the change history table (create if specified)
640+
change_history_table = get_change_history_table_details(config['change_history_table'])
641+
change_history_metadata = session.fetch_change_history_metadata(change_history_table)
642+
if change_history_metadata:
643+
print(_log_ch_use.format(last_altered=change_history_metadata['last_altered'], **change_history_table))
644+
elif config['create_change_history_table']:
645+
# Create the change history table (and containing objects) if it don't exist.
646+
if not config['dry_run']:
647+
session.create_change_history_table_if_missing(change_history_table)
648+
print(_log_ch_create.format(**change_history_table))
649+
else:
650+
raise ValueError(_err_ch_missing.format(**change_history_table))
651+
652+
# Find the max published version
653+
max_published_version = ''
654+
655+
change_history = None
656+
r_scripts_checksum = None
657+
if (config['dry_run'] and change_history_metadata) or not config['dry_run']:
658+
change_history = session.fetch_change_history(change_history_table)
659+
r_scripts_checksum = session.fetch_r_scripts_checksum(change_history_table)
660+
661+
if change_history:
662+
max_published_version = change_history[0]
663+
max_published_version_display = max_published_version
664+
if max_published_version_display == '':
665+
max_published_version_display = 'None'
666+
print(_log_ch_max_version.format(max_published_version_display=max_published_version_display))
667+
668+
# Find all scripts in the root folder (recursively) and sort them correctly
669+
all_scripts = get_all_scripts_recursively(config['root_folder'], config['verbose'])
670+
all_script_names = list(all_scripts.keys())
671+
# Sort scripts such that versioned scripts get applied first and then the repeatable ones.
672+
all_script_names_sorted = sorted_alphanumeric([script for script in all_script_names if script[0] == 'R'])
673+
674+
# Loop through each script in order and apply any required changes
675+
for script_name in all_script_names_sorted:
676+
script = all_scripts[script_name]
677+
678+
# Always process with jinja engine
679+
jinja_processor = JinjaTemplateProcessor(project_root = config['root_folder'], modules_folder = config['modules_folder'])
680+
content = jinja_processor.render(jinja_processor.relpath(script['script_full_path']), config['vars'], config['verbose'])
681+
682+
# Apply only R scripts where the checksum changed compared to the last execution of snowchange
683+
if script_name[0] == 'R':
684+
# Compute the checksum for the script
685+
checksum_current = hashlib.sha224(content.encode('utf-8')).hexdigest()
686+
687+
# check if R file was already executed
688+
if (r_scripts_checksum is not None) and script_name in list(r_scripts_checksum['script_name']):
689+
checksum_last = list(r_scripts_checksum.loc[r_scripts_checksum['script_name'] == script_name, 'checksum'])[0]
690+
else:
691+
checksum_last = ''
692+
693+
# check if there is a change of the checksum in the script
694+
if checksum_current == checksum_last:
695+
if config['verbose']:
696+
print(_log_skip_r.format(**script))
697+
scripts_skipped += 1
698+
continue
699+
700+
print(_log_apply.format(**script))
701+
702+
if not config['dry_run']:
703+
session.record_change_script(script, content, change_history_table, 0)
704+
scripts_applied += 1
705+
706+
print(_log_apply_set_complete.format(scripts_applied=scripts_applied, scripts_skipped=scripts_skipped))
707+
616708
def render_command(config, script_path):
617709
"""
618710
Renders the provided script.
@@ -622,7 +714,7 @@ def render_command(config, script_path):
622714
# Validate the script file path
623715
script_path = os.path.abspath(script_path)
624716
if not os.path.isfile(script_path):
625-
raise ValueError(_err_invalid_folder.format(folder_type='script_path', path=script_path))
717+
raise ValueError(_err_invalid_folder.format(folder_type='script_path', path=script_path))
626718
# Always process with jinja engine
627719
jinja_processor = JinjaTemplateProcessor(project_root = config['root_folder'], \
628720
modules_folder = config['modules_folder'])
@@ -713,10 +805,10 @@ def get_schemachange_config(config_file_path, root_folder, modules_folder, snowf
713805

714806
# Validate folder paths
715807
if 'root_folder' in config:
716-
config['root_folder'] = os.path.abspath(config['root_folder'])
808+
config['root_folder'] = os.path.abspath(config['root_folder'])
717809
if not os.path.isdir(config['root_folder']):
718810
raise ValueError(_err_invalid_folder.format(folder_type='root', path=config['root_folder']))
719-
811+
720812
if config['modules_folder']:
721813
config['modules_folder'] = os.path.abspath(config['modules_folder'])
722814
if not os.path.isdir(config['modules_folder']):
@@ -810,7 +902,7 @@ def get_all_scripts_recursively(root_directory, verbose):
810902
# Throw an error if the same version exists more than once
811903
if script_type == 'V':
812904
if script['script_version'] in all_versions:
813-
raise ValueError(_err_dup_scripts_version.format(**script))
905+
raise ValueError(_err_dup_scripts_version.format(**script))
814906
all_versions.append(script['script_version'])
815907

816908
if script_type == 'U':
@@ -828,22 +920,22 @@ def get_all_scripts_recursively(root_directory, verbose):
828920
def get_change_history_table_details(change_history_table):
829921
# Start with the global defaults
830922
details = dict()
831-
details['database_name'] = _metadata_database_name
832-
details['schema_name'] = _metadata_schema_name
833-
details['table_name'] = _metadata_table_name
923+
details['database_name'] = _metadata_database_name
924+
details['schema_name'] = _metadata_schema_name
925+
details['table_name'] = _metadata_table_name
834926

835927
# Then override the defaults if requested. The name could be in one, two or three part notation.
836928
if change_history_table is not None:
837929
table_name_parts = change_history_table.strip().split('.')
838-
if len(table_name_parts) == 1:
839-
details['table_name'] = table_name_parts[0]
930+
if len(table_name_parts) == 1:
931+
details['table_name'] = table_name_parts[0]
840932
elif len(table_name_parts) == 2:
841-
details['table_name'] = table_name_parts[1]
842-
details['schema_name'] = table_name_parts[0]
933+
details['table_name'] = table_name_parts[1]
934+
details['schema_name'] = table_name_parts[0]
843935
elif len(table_name_parts) == 3:
844-
details['table_name'] = table_name_parts[2]
845-
details['schema_name'] = table_name_parts[1]
846-
details['database_name'] = table_name_parts[0]
936+
details['table_name'] = table_name_parts[2]
937+
details['schema_name'] = table_name_parts[1]
938+
details['database_name'] = table_name_parts[0]
847939
else:
848940
raise ValueError(_err_invalid_cht % change_history_table)
849941
#if the object name does not include '"' raise to upper case on return
@@ -894,6 +986,24 @@ def main(argv=sys.argv):
894986
parser = argparse.ArgumentParser(prog = 'schemachange', description = 'Apply schema changes to a Snowflake account. Full readme at https://github.com/Snowflake-Labs/schemachange', formatter_class = argparse.RawTextHelpFormatter)
895987
subcommands = parser.add_subparsers(dest='subcommand')
896988

989+
parser_undo = subcommands.add_parser("recalculate_checksum")
990+
parser_undo.add_argument('--config-folder', type = str, default = '.', help = 'The folder to look in for the schemachange-config.yml file (the default is the current working directory)', required = False)
991+
parser_undo.add_argument('-s', '--step', type = int, default = 1, help = 'Amount of versioned migrations to be undone in the reverse of their applied order', required = False)
992+
parser_undo.add_argument('-f', '--root-folder', type = str, help = 'The root folder for the database change scripts', required = False)
993+
parser_undo.add_argument('-m', '--modules-folder', type = str, help = 'The modules folder for jinja macros and templates to be used across multiple scripts', required = False)
994+
parser_undo.add_argument('-a', '--snowflake-account', type = str, help = 'The name of the snowflake account (e.g. xy12345.east-us-2.azure)', required = False)
995+
parser_undo.add_argument('-u', '--snowflake-user', type = str, help = 'The name of the snowflake user', required = False)
996+
parser_undo.add_argument('-r', '--snowflake-role', type = str, help = 'The name of the default role to use', required = False)
997+
parser_undo.add_argument('-w', '--snowflake-warehouse', type = str, help = 'The name of the default warehouse to use. Can be overridden in the change scripts.', required = False)
998+
parser_undo.add_argument('-d', '--snowflake-database', type = str, help = 'The name of the default database to use. Can be overridden in the change scripts.', required = False)
999+
parser_undo.add_argument('-c', '--change-history-table', type = str, help = 'Used to override the default name of the change history table (the default is METADATA.SCHEMACHANGE.CHANGE_HISTORY)', required = False)
1000+
parser_undo.add_argument('--vars', type = json.loads, help = 'Define values for the variables to replaced in change scripts, given in JSON format (e.g. {"variable1": "value1", "variable2": "value2"})', required = False)
1001+
parser_undo.add_argument('-ac', '--autocommit', action='store_true', help = 'Enable autocommit feature for DML commands (the default is False)', required = False)
1002+
parser_undo.add_argument('-v','--verbose', action='store_true', help = 'Display verbose debugging details during execution (the default is False)', required = False)
1003+
parser_undo.add_argument('--dry-run', action='store_true', help = 'Run schemachange in dry run mode (the default is False)', required = False)
1004+
parser_undo.add_argument('--query-tag', type = str, help = 'The string to add to the Snowflake QUERY_TAG session value for each query executed', required = False)
1005+
parser_undo.add_argument('--oauth-config', type = json.loads, help = 'Define values for the variables to Make Oauth Token requests (e.g. {"token-provider-url": "https//...", "token-request-payload": {"client_id": "GUID_xyz",...},... })', required = False)
1006+
8971007
parser_undo = subcommands.add_parser("undo")
8981008
parser_undo.add_argument('--config-folder', type = str, default = '.', help = 'The folder to look in for the schemachange-config.yml file (the default is the current working directory)', required = False)
8991009
parser_undo.add_argument('-s', '--step', type = int, default = 1, help = 'Amount of versioned migrations to be undone in the reverse of their applied order', required = False)
@@ -942,7 +1052,7 @@ def main(argv=sys.argv):
9421052
# The original parameters did not support subcommands. Check if a subcommand has been supplied
9431053
# if not default to deploy to match original behaviour.
9441054
args = argv[1:]
945-
if len(args) == 0 or not any(subcommand in args[0].upper() for subcommand in ["DEPLOY", "RENDER", "UNDO"]):
1055+
if len(args) == 0 or not any(subcommand in args[0].upper() for subcommand in ["DEPLOY", "RENDER", "UNDO", "RECALCULATE_CHECKSUM"]):
9461056
args = ["deploy"] + args
9471057

9481058
args = parser.parse_args(args)
@@ -964,6 +1074,8 @@ def main(argv=sys.argv):
9641074
"create_change_history_table":None,"autocommit":None,"dry_run":None,"query_tag":None,"oauth_config":None,"step":None }
9651075
elif args.subcommand == 'undo':
9661076
renderoveride = {"create_change_history_table":None}
1077+
elif args.subcommand == 'recalculate_checksum':
1078+
renderoveride = {"create_change_history_table":None}
9671079
elif args.subcommand == 'deploy':
9681080
renderoveride = {"step":None}
9691081

@@ -997,6 +1109,8 @@ def main(argv=sys.argv):
9971109
render_command(config, args.script)
9981110
elif args.subcommand == 'undo':
9991111
undo_command(config)
1112+
elif args.subcommand == 'recalculate_checksum':
1113+
recalculate_checksum(config)
10001114
else:
10011115
deploy_command(config)
10021116

setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = schemachange
3-
version = 3.5.3
3+
version = 3.5.4
44
author = jamesweakley/jeremiahhansen
55
description = A Database Change Management tool for Snowflake
66
long_description = file: README.md

0 commit comments

Comments
 (0)