-
Notifications
You must be signed in to change notification settings - Fork 3.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
380 additions
and
25 deletions.
There are no files selected for viewing
33 changes: 33 additions & 0 deletions
33
.github/workflows/provision-replay-verify-archive-disks.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# This defines a workflow to replay transactions on the given chain with the latest aptos node software. | ||
# In order to trigger it go to the Actions Tab of the Repo, click "replay-verify" and then "Run Workflow". | ||
# | ||
# On PR, a single test case will run. On workflow_dispatch, you may specify the CHAIN_NAME to verify. | ||
|
||
name: "provision-replay-verify-archive-disks" | ||
on: | ||
# Allow triggering manually | ||
workflow_dispatch: | ||
inputs: | ||
NETWORK: | ||
required: true | ||
type: string | ||
description: The network to provision storage for. | ||
pull_request: | ||
paths: | ||
- '.github/workflows/provision-replay-verify-archive-disks.yaml' | ||
- '.github/workflows/workflow-run-replay-verify-archive-storage-provision.yaml' | ||
schedule: | ||
- cron: "0 22 * * 0,2,4" # The main branch cadence. This runs every Sun,Tues,Thurs | ||
|
||
jobs: | ||
replay-testnet: | ||
uses: ./.github/workflows/workflow-run-replay-verify-archive-storage-provision.yaml | ||
secrets: inherit | ||
with: | ||
NETWORK: testnet | ||
|
||
replay-mainnet: | ||
uses: ./.github/workflows/workflow-run-replay-verify-archive-storage-provision.yaml | ||
secrets: inherit | ||
with: | ||
NETWORK: mainnet |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
60 changes: 60 additions & 0 deletions
60
.github/workflows/workflow-run-replay-verify-archive-storage-provision.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
name: "*run replay-verify reusable workflow" | ||
|
||
on: | ||
# This allows the workflow to be triggered from another workflow | ||
workflow_call: | ||
inputs: | ||
NETWORK: | ||
required: true | ||
type: string | ||
description: The network to provision storage for. | ||
# This allows the workflow to be triggered manually from the Github UI or CLI | ||
# NOTE: because the "number" type is not supported, we default to 720 minute timeout | ||
workflow_dispatch: | ||
inputs: | ||
NETWORK: | ||
description: The network to provision storage for. | ||
type: string | ||
required: true | ||
jobs: | ||
provision: | ||
runs-on: runs-on,cpu=4,ram=16,family=m7a+m7i-flex,image=aptos-ubuntu-x64,run-id=${{ github.run_id }},spot=co | ||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v4 | ||
with: | ||
ref: main | ||
- name: Setup Python | ||
uses: ./.github/actions/python-setup | ||
with: | ||
pyproject_directory: testsuite/replay-verify | ||
python_version: 3.10 | ||
|
||
- uses: aptos-labs/aptos-core/.github/actions/docker-setup@main | ||
id: docker-setup | ||
with: | ||
GCP_WORKLOAD_IDENTITY_PROVIDER: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} | ||
GCP_SERVICE_ACCOUNT_EMAIL: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }} | ||
EXPORT_GCP_PROJECT_VARIABLES: "false" | ||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||
AWS_DOCKER_ARTIFACT_REPO: ${{ secrets.AWS_DOCKER_ARTIFACT_REPO }} | ||
GIT_CREDENTIALS: ${{ secrets.GIT_CREDENTIALS }} | ||
GCP_AUTH_DURATION: 3600 | ||
|
||
- name: "Export GCloud auth token" | ||
id: gcloud-auth | ||
run: echo "CLOUDSDK_AUTH_ACCESS_TOKEN=${{ steps.docker-setup.outputs.CLOUDSDK_AUTH_ACCESS_TOKEN }}" >> $GITHUB_ENV | ||
shell: bash | ||
|
||
- name: "Setup GCloud project" | ||
shell: bash | ||
run: gcloud config set project aptos-devinfra-0 | ||
|
||
- name: "Provision storage" | ||
run: poetry run python archive_disk_utils.py ${{ inputs.NETWORK}} | ||
working-directory: ./testsuite/replay-verify | ||
|
||
|
||
|
||
|
253 changes: 253 additions & 0 deletions
253
.github/workflows/workflow-run-replay-verify-on-archive.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,253 @@ | ||
name: "*run replay-verify reusable workflow" | ||
|
||
on: | ||
# This allows the workflow to be triggered from another workflow | ||
workflow_call: | ||
inputs: | ||
GIT_SHA: | ||
required: true | ||
type: string | ||
description: The git SHA1 to test. | ||
# replay-verify config | ||
START_VERSION: | ||
required: false | ||
type: string | ||
description: The history start to use for the backup. If not specified, it will use the default history start. | ||
END_VERSION: | ||
required: false | ||
type: string | ||
description: The end version to use for the backup. If not specified, it will use the latest version. | ||
RANGES_TO_SKIP: | ||
required: false | ||
type: string | ||
description: The optional list of transaction ranges to skip. | ||
RUNS_ON: | ||
description: "The runner to use for the job." | ||
type: string | ||
required: true | ||
default: "medium-perf-local-ssd" | ||
# This allows the workflow to be triggered manually from the Github UI or CLI | ||
# NOTE: because the "number" type is not supported, we default to 720 minute timeout | ||
workflow_dispatch: | ||
inputs: | ||
GIT_SHA: | ||
required: true | ||
type: string | ||
description: The git SHA1 to test. | ||
# replay-verify config | ||
START_VERSION: | ||
required: false | ||
type: string | ||
description: The history start to use for the backup. If not specified, it will use the default history start. | ||
END_VERSION: | ||
required: false | ||
type: string | ||
description: The end version to use for the backup. If not specified, it will use the latest version. | ||
RANGES_TO_SKIP: | ||
required: false | ||
type: string | ||
description: The optional list of transaction ranges to skip. | ||
RUNS_ON: | ||
description: "The runner to use for the job." | ||
type: string | ||
required: true | ||
default: "high-perf-docker-with-local-ssd" | ||
jobs: | ||
prepare: | ||
runs-on: ${{ inputs.RUNS_ON }} | ||
outputs: | ||
job_ids: ${{ steps.gen-jobs.outputs.job_ids }} | ||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v4 | ||
with: | ||
ref: ${{ inputs.GIT_SHA }} | ||
|
||
- name: Load cached aptos-debugger binary | ||
id: cache-aptos-debugger-binary | ||
uses: actions/cache@v4 | ||
with: | ||
# copy the binary to the root of the repo and cache it there, because rust-setup calls a cache-rust action | ||
# which cleans up the target directory in its post action | ||
path: | | ||
aptos-debugger | ||
key: aptos-debugger-${{ inputs.GIT_SHA || github.sha }} | ||
|
||
- name: Prepare for build if not cached | ||
if: steps.cache-aptos-debugger-binary.outputs.cache-hit != 'true' | ||
uses: aptos-labs/aptos-core/.github/actions/rust-setup@main | ||
with: | ||
GIT_CREDENTIALS: ${{ inputs.GIT_CREDENTIALS }} | ||
|
||
- name: Build and strip aptos-debugger binary if not cached | ||
if: steps.cache-aptos-debugger-binary.outputs.cache-hit != 'true' | ||
shell: bash | ||
run: | | ||
cargo build --release -p aptos-debugger | ||
strip -s target/release/aptos-debugger | ||
cp target/release/aptos-debugger . | ||
- name: Install GCloud SDK | ||
uses: "google-github-actions/setup-gcloud@v2" | ||
with: | ||
version: ">= 418.0.0" | ||
install_components: "kubectl,gke-gcloud-auth-plugin" | ||
|
||
- name: get timestamp to use in cache key | ||
id: get-timestamp | ||
run: echo "ts=$(date +%s)" >> $GITHUB_OUTPUT | ||
|
||
- name: Load cached backup storage metadata cache dir (and save back afterwards) | ||
uses: actions/cache@v4 | ||
with: | ||
path: metadata_cache | ||
key: metadata-cache-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}-${{ steps.get-timestamp.outputs.ts }} | ||
restore-keys: metadata-cache-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}- | ||
|
||
- name: Generate job ranges | ||
id: gen-jobs | ||
env: | ||
BUCKET: ${{ inputs.BUCKET }} | ||
SUB_DIR: ${{ inputs.SUB_DIR }} | ||
run: | | ||
./aptos-debugger aptos-db gen-replay-verify-jobs \ | ||
--metadata-cache-dir ./metadata_cache \ | ||
--command-adapter-config ${{ inputs.BACKUP_CONFIG_TEMPLATE_PATH }} \ | ||
--start-version ${{ inputs.HISTORY_START }} \ | ||
--ranges-to-skip "${{ inputs.RANGES_TO_SKIP }}" \ | ||
--max-versions-per-range ${{ inputs.MAX_VERSIONS_PER_RANGE }} \ | ||
\ | ||
--max-ranges-per-job 16 \ | ||
--output-json-file jobs.json \ | ||
jq -c 'length as $N | [range(0; $N)]' jobs.json > job_ids.json | ||
cat job_ids.json | ||
jq . jobs.json | ||
echo "job_ids=$(cat job_ids.json)" >> $GITHUB_OUTPUT | ||
- name: Cache backup storage config and job definition | ||
uses: actions/cache/save@v4 | ||
with: | ||
path: | | ||
${{ inputs.BACKUP_CONFIG_TEMPLATE_PATH }} | ||
jobs.json | ||
key: backup-config-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}-${{ github.run_id }} | ||
|
||
replay-verify: | ||
needs: prepare | ||
timeout-minutes: ${{ inputs.TIMEOUT_MINUTES || 180 }} | ||
runs-on: ${{ inputs.RUNS_ON }} | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
job_id: ${{ fromJson(needs.prepare.outputs.job_ids) }} | ||
steps: | ||
- name: Load cached aptos-debugger binary | ||
uses: actions/cache/restore@v4 | ||
with: | ||
path: | | ||
aptos-debugger | ||
key: aptos-debugger-${{ inputs.GIT_SHA || github.sha }} | ||
fail-on-cache-miss: true | ||
|
||
- name: Load cached backup storage metadata cache dir | ||
uses: actions/cache/restore@v4 | ||
with: | ||
path: metadata_cache | ||
key: metadata-cache-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}- | ||
fail-on-cache-miss: true | ||
|
||
- name: Load cached backup storage config and job definitions | ||
uses: actions/cache/restore@v4 | ||
with: | ||
path: | | ||
${{ inputs.BACKUP_CONFIG_TEMPLATE_PATH }} | ||
jobs.json | ||
key: backup-config-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}-${{ github.run_id }} | ||
fail-on-cache-miss: true | ||
|
||
- name: Install GCloud SDK | ||
uses: "google-github-actions/setup-gcloud@v2" | ||
with: | ||
version: ">= 418.0.0" | ||
install_components: "kubectl,gke-gcloud-auth-plugin" | ||
|
||
- name: Run replay-verify in parallel | ||
env: | ||
BUCKET: ${{ inputs.BUCKET }} | ||
SUB_DIR: ${{ inputs.SUB_DIR }} | ||
shell: bash | ||
run: | | ||
set -o nounset -o errexit -o pipefail | ||
replay() { | ||
idx=$1 | ||
id=$2 | ||
begin=$3 | ||
end=$4 | ||
desc=$5 | ||
echo --------- | ||
echo Job start. $id: $desc | ||
echo --------- | ||
MC=metadata_cache_$idx | ||
cp -r metadata_cache $MC | ||
DB=db_$idx | ||
for try in {0..6} | ||
do | ||
if [ $try -gt 0 ]; then | ||
SLEEP=$((10 * $try)) | ||
echo "sleeping for $SLEEP seconds before retry #$try" >&2 | ||
sleep $SLEEP | ||
fi | ||
res=0 | ||
./aptos-debugger aptos-db replay-verify \ | ||
--metadata-cache-dir $MC \ | ||
--command-adapter-config ${{ inputs.BACKUP_CONFIG_TEMPLATE_PATH }} \ | ||
--start-version $begin \ | ||
--end-version $end \ | ||
\ | ||
--lazy-quit \ | ||
--enable-storage-sharding \ | ||
--target-db-dir $DB \ | ||
--concurrent-downloads 8 \ | ||
--replay-concurrency-level 4 \ | ||
|| res=$? | ||
if [[ $res == 0 || $res == 2 ]] | ||
then | ||
return $res | ||
fi | ||
done | ||
return 1 | ||
} | ||
pids=() | ||
idx=0 | ||
while read id begin end desc; do | ||
replay $idx $id $begin $end "$desc" 2>&1 | sed "s/^/[partition $idx]: /" & | ||
pids[$idx]=$! | ||
idx=$((idx+1)) | ||
done < <(jq '.[${{ matrix.job_id }}][]' jobs.json) | ||
res=0 | ||
for idx in `seq 0 $((idx-1))` | ||
do | ||
range_res=0 | ||
wait ${pids[$idx]} || range_res=$? | ||
echo partition $idx returned $range_res | ||
if [[ $range_res != 0 ]] | ||
then | ||
res=$range_res | ||
fi | ||
done | ||
echo All partitions done, returning $res | ||
exit $res |
Oops, something went wrong.