Skip to content

Commit

Permalink
Merge pull request #1664 from edx/musama/amplitude_user_properties_ba…
Browse files Browse the repository at this point in the history
…ckfill

feat: Jenkins job to backfill user properties on Amplitude.
  • Loading branch information
usama101 authored Sep 18, 2023
2 parents f0110cb + fd85f9e commit 76f1c0c
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 0 deletions.
60 changes: 60 additions & 0 deletions dataeng/jobs/analytics/AmplitudeUserPropertiesBackfill.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package analytics

import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization
import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator
import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers
import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters
import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm


class AmplitudeUserPropertiesBackfill {
public static def job = { dslFactory, allVars ->
dslFactory.job("amplitude-user-properties-backfill") {
logRotator common_log_rotator(allVars)
authorization common_authorization(allVars)
parameters secure_scm_parameters(allVars)
parameters {
stringParam('ANALYTICS_TOOLS_URL', allVars.get('ANALYTICS_TOOLS_URL'), 'URL for the analytics tools repo.')
stringParam('ANALYTICS_TOOLS_BRANCH', allVars.get('ANALYTICS_TOOLS_BRANCH'), 'Branch of analytics tools repo to use.')
stringParam('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY'), 'Space separated list of emails to send notifications to.')
stringParam('PYTHON_VENV_VERSION', 'python3.7', 'Python virtual environment version to used.')
stringParam('AMPLITUDE_DATA_SOURCE_TABLE', '', 'Table name that has data which needs to be updated on Amplitude. It should have format like database.schema.table.')
stringParam('COLUMNS_TO_UPDATE', '', 'Columns that you want to update. Separate multiple columns with commas.')
stringParam('RESPONSE_TABLE', '', 'Output table which will store the updated data along with response from API endpoint.')
stringParam('AMPLITUDE_OPERATION_NAME', '', 'Amplitude user property operation name. e.g: set or setOnce.')
}
environmentVariables {
env('KEY_PATH', allVars.get('KEY_PATH'))
env('PASSPHRASE_PATH', allVars.get('PASSPHRASE_PATH'))
env('USER', allVars.get('USER'))
env('ACCOUNT', allVars.get('ACCOUNT'))
env('AMPLITUDE_VAULT_KV_PATH', allVars.get('AMPLITUDE_VAULT_KV_PATH'))
env('AMPLITUDE_VAULT_KV_VERSION', allVars.get('AMPLITUDE_VAULT_KV_VERSION'))
}
multiscm secure_scm(allVars) << {
git {
remote {
url('$ANALYTICS_TOOLS_URL')
branch('$ANALYTICS_TOOLS_BRANCH')
credentials('1')
}
extensions {
relativeTargetDirectory('analytics-tools')
pruneBranches()
cleanAfterCheckout()
}
}
}
wrappers {
timestamps()
credentialsBinding {
usernamePassword('ANALYTICS_VAULT_ROLE_ID', 'ANALYTICS_VAULT_SECRET_ID', 'analytics-vault');
}
}
publishers common_publishers(allVars)
steps {
shell(dslFactory.readFileFromWorkspace('dataeng/resources/amplitude-properties-backfill.sh'))
}
}
}
}
2 changes: 2 additions & 0 deletions dataeng/jobs/createJobsNew.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import static analytics.RetirementJobEdxTriggers.job as RetirementJobEdxTriggers
import static analytics.RetirementJobs.job as RetirementJobsJob
import static analytics.SnowflakeCollectMetrics.job as SnowflakeCollectMetricsJob
import static analytics.SnowflakeExpirePasswords.job as SnowflakeExpirePasswordsJob
import static analytics.AmplitudeUserPropertiesBackfill.job as AmplitudeUserPropertiesBackfillJob
import static analytics.SnowflakeSchemaBuilder.job as SnowflakeSchemaBuilderJob
import static analytics.SnowflakeUserRetirementStatusCleanup.job as SnowflakeUserRetirementStatusCleanupJob
import static analytics.PrefectFlowsDeployment.job as PrefectFlowsDeploymentJob
Expand Down Expand Up @@ -50,6 +51,7 @@ def taskMap = [
RETIREMENT_JOBS_JOB: RetirementJobsJob,
SNOWFLAKE_COLLECT_METRICS_JOB: SnowflakeCollectMetricsJob,
SNOWFLAKE_EXPIRE_PASSWORDS_JOB: SnowflakeExpirePasswordsJob,
AMPLITUDE_USER_PROPERTIES_BACKFILL_JOB: AmplitudeUserPropertiesBackfillJob,
SNOWFLAKE_SCHEMA_BUILDER_JOB: SnowflakeSchemaBuilderJob,
SNOWFLAKE_USER_RETIREMENT_STATUS_CLEANUP_JOB: SnowflakeUserRetirementStatusCleanupJob,
PREFECT_FLOWS_DEPLOYMENT_JOB: PrefectFlowsDeploymentJob,
Expand Down
40 changes: 40 additions & 0 deletions dataeng/resources/amplitude-properties-backfill.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env bash
set -ex

# Creating Python virtual env
PYTHON_VENV="python_venv"
virtualenv --python=$PYTHON_VENV_VERSION --clear "${PYTHON_VENV}"
source "${PYTHON_VENV}/bin/activate"

# Setup
cd $WORKSPACE/analytics-tools/snowflake
make requirements

# Do not print commands in this function since they may contain secrets.
set +x

# Retrieve a vault token corresponding to the jenkins AppRole. The token is then stored in the VAULT_TOKEN variable
# which is implicitly used by subsequent vault commands within this script.
# Instructions followed: https://learn.hashicorp.com/tutorials/vault/approle#step-4-login-with-roleid-secretid
export VAULT_TOKEN=$(vault write -field=token auth/approle/login \
role_id=${ANALYTICS_VAULT_ROLE_ID} \
secret_id=${ANALYTICS_VAULT_SECRET_ID}
)

API_KEY=$(
vault kv get \
-version=${AMPLITUDE_VAULT_KV_VERSION} \
-field=API_KEY \
${AMPLITUDE_VAULT_KV_PATH} \
)

python amplitude_user_properties_update.py \
--key_path $KEY_PATH \
--passphrase_path $PASSPHRASE_PATH \
--automation_user $USER \
--account $ACCOUNT \
--amplitude_data_source_table $AMPLITUDE_DATA_SOURCE_TABLE \
--columns_to_update $COLUMNS_TO_UPDATE \
--response_table $RESPONSE_TABLE \
--amplitude_operation_name $AMPLITUDE_OPERATION_NAME \
--amplitude_api_key $API_KEY

0 comments on commit 76f1c0c

Please sign in to comment.