From fd85f9e6ea8a3f6848d86c03ece11ece3ae2d0cf Mon Sep 17 00:00:00 2001 From: Muhammad Usama Date: Mon, 28 Aug 2023 18:03:42 +0500 Subject: [PATCH] feat: Jenkins instance to backfill user properties on Amplitude. --- .../AmplitudeUserPropertiesBackfill.groovy | 60 +++++++++++++++++++ dataeng/jobs/createJobsNew.groovy | 2 + .../amplitude-properties-backfill.sh | 40 +++++++++++++ 3 files changed, 102 insertions(+) create mode 100644 dataeng/jobs/analytics/AmplitudeUserPropertiesBackfill.groovy create mode 100644 dataeng/resources/amplitude-properties-backfill.sh diff --git a/dataeng/jobs/analytics/AmplitudeUserPropertiesBackfill.groovy b/dataeng/jobs/analytics/AmplitudeUserPropertiesBackfill.groovy new file mode 100644 index 000000000..421ce0536 --- /dev/null +++ b/dataeng/jobs/analytics/AmplitudeUserPropertiesBackfill.groovy @@ -0,0 +1,60 @@ +package analytics + +import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization +import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator +import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers +import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters +import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm + + +class AmplitudeUserPropertiesBackfill { + public static def job = { dslFactory, allVars -> + dslFactory.job("amplitude-user-properties-backfill") { + logRotator common_log_rotator(allVars) + authorization common_authorization(allVars) + parameters secure_scm_parameters(allVars) + parameters { + stringParam('ANALYTICS_TOOLS_URL', allVars.get('ANALYTICS_TOOLS_URL'), 'URL for the analytics tools repo.') + stringParam('ANALYTICS_TOOLS_BRANCH', allVars.get('ANALYTICS_TOOLS_BRANCH'), 'Branch of analytics tools repo to use.') + stringParam('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY'), 'Space separated list of emails to send notifications to.') + stringParam('PYTHON_VENV_VERSION', 'python3.7', 'Python virtual environment version to used.') + stringParam('AMPLITUDE_DATA_SOURCE_TABLE', '', 'Table name that has data which needs to be updated on Amplitude. It should have format like database.schema.table.') + stringParam('COLUMNS_TO_UPDATE', '', 'Columns that you want to update. Separate multiple columns with commas.') + stringParam('RESPONSE_TABLE', '', 'Output table which will store the updated data along with response from API endpoint.') + stringParam('AMPLITUDE_OPERATION_NAME', '', 'Amplitude user property operation name. e.g: set or setOnce.') + } + environmentVariables { + env('KEY_PATH', allVars.get('KEY_PATH')) + env('PASSPHRASE_PATH', allVars.get('PASSPHRASE_PATH')) + env('USER', allVars.get('USER')) + env('ACCOUNT', allVars.get('ACCOUNT')) + env('AMPLITUDE_VAULT_KV_PATH', allVars.get('AMPLITUDE_VAULT_KV_PATH')) + env('AMPLITUDE_VAULT_KV_VERSION', allVars.get('AMPLITUDE_VAULT_KV_VERSION')) + } + multiscm secure_scm(allVars) << { + git { + remote { + url('$ANALYTICS_TOOLS_URL') + branch('$ANALYTICS_TOOLS_BRANCH') + credentials('1') + } + extensions { + relativeTargetDirectory('analytics-tools') + pruneBranches() + cleanAfterCheckout() + } + } + } + wrappers { + timestamps() + credentialsBinding { + usernamePassword('ANALYTICS_VAULT_ROLE_ID', 'ANALYTICS_VAULT_SECRET_ID', 'analytics-vault'); + } + } + publishers common_publishers(allVars) + steps { + shell(dslFactory.readFileFromWorkspace('dataeng/resources/amplitude-properties-backfill.sh')) + } + } + } +} diff --git a/dataeng/jobs/createJobsNew.groovy b/dataeng/jobs/createJobsNew.groovy index 578e4fd96..e6b25a068 100644 --- a/dataeng/jobs/createJobsNew.groovy +++ b/dataeng/jobs/createJobsNew.groovy @@ -8,6 +8,7 @@ import static analytics.RetirementJobEdxTriggers.job as RetirementJobEdxTriggers import static analytics.RetirementJobs.job as RetirementJobsJob import static analytics.SnowflakeCollectMetrics.job as SnowflakeCollectMetricsJob import static analytics.SnowflakeExpirePasswords.job as SnowflakeExpirePasswordsJob +import static analytics.AmplitudeUserPropertiesBackfill.job as AmplitudeUserPropertiesBackfillJob import static analytics.SnowflakeSchemaBuilder.job as SnowflakeSchemaBuilderJob import static analytics.SnowflakeUserRetirementStatusCleanup.job as SnowflakeUserRetirementStatusCleanupJob import static analytics.PrefectFlowsDeployment.job as PrefectFlowsDeploymentJob @@ -50,6 +51,7 @@ def taskMap = [ RETIREMENT_JOBS_JOB: RetirementJobsJob, SNOWFLAKE_COLLECT_METRICS_JOB: SnowflakeCollectMetricsJob, SNOWFLAKE_EXPIRE_PASSWORDS_JOB: SnowflakeExpirePasswordsJob, + AMPLITUDE_USER_PROPERTIES_BACKFILL_JOB: AmplitudeUserPropertiesBackfillJob, SNOWFLAKE_SCHEMA_BUILDER_JOB: SnowflakeSchemaBuilderJob, SNOWFLAKE_USER_RETIREMENT_STATUS_CLEANUP_JOB: SnowflakeUserRetirementStatusCleanupJob, PREFECT_FLOWS_DEPLOYMENT_JOB: PrefectFlowsDeploymentJob, diff --git a/dataeng/resources/amplitude-properties-backfill.sh b/dataeng/resources/amplitude-properties-backfill.sh new file mode 100644 index 000000000..beb8aadef --- /dev/null +++ b/dataeng/resources/amplitude-properties-backfill.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +set -ex + +# Creating Python virtual env +PYTHON_VENV="python_venv" +virtualenv --python=$PYTHON_VENV_VERSION --clear "${PYTHON_VENV}" +source "${PYTHON_VENV}/bin/activate" + +# Setup +cd $WORKSPACE/analytics-tools/snowflake +make requirements + +# Do not print commands in this function since they may contain secrets. +set +x + +# Retrieve a vault token corresponding to the jenkins AppRole. The token is then stored in the VAULT_TOKEN variable +# which is implicitly used by subsequent vault commands within this script. +# Instructions followed: https://learn.hashicorp.com/tutorials/vault/approle#step-4-login-with-roleid-secretid +export VAULT_TOKEN=$(vault write -field=token auth/approle/login \ + role_id=${ANALYTICS_VAULT_ROLE_ID} \ + secret_id=${ANALYTICS_VAULT_SECRET_ID} + ) + +API_KEY=$( + vault kv get \ + -version=${AMPLITUDE_VAULT_KV_VERSION} \ + -field=API_KEY \ + ${AMPLITUDE_VAULT_KV_PATH} \ +) + +python amplitude_user_properties_update.py \ + --key_path $KEY_PATH \ + --passphrase_path $PASSPHRASE_PATH \ + --automation_user $USER \ + --account $ACCOUNT \ + --amplitude_data_source_table $AMPLITUDE_DATA_SOURCE_TABLE \ + --columns_to_update $COLUMNS_TO_UPDATE \ + --response_table $RESPONSE_TABLE \ + --amplitude_operation_name $AMPLITUDE_OPERATION_NAME \ + --amplitude_api_key $API_KEY