Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 47 additions & 6 deletions spartan/scripts/network_pause.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,61 @@ if [[ -z "$NAMESPACE" ]]; then
usage
fi

log "Snapshotting $NAMESPACE"
$scripts_dir/manual_snapshot.sh $NAMESPACE
CONFIGMAP_NAME="network-pause-state"

log "Waiting for snapshot upload"
sleep 60 # staging-ignition takes 28s
# Guard against double-pause (would overwrite saved state with zeros)
if kubectl get configmap "$CONFIGMAP_NAME" -n "$NAMESPACE" &>/dev/null; then
die "Namespace $NAMESPACE is already paused (ConfigMap $CONFIGMAP_NAME exists). Run network_resume.sh first."
fi

# Snapshot if the cronjob exists (not all networks have snapshots enabled)
SNAPSHOT_CRONJOB="$NAMESPACE-snapshot-aztec-snapshots"
if kubectl get cronjob "$SNAPSHOT_CRONJOB" -n "$NAMESPACE" &>/dev/null; then
log "Snapshotting $NAMESPACE"
$scripts_dir/manual_snapshot.sh $NAMESPACE
log "Waiting for snapshot upload"
sleep 60 # staging-ignition takes 28s
else
log "Snapshot cronjob not found ($SNAPSHOT_CRONJOB), skipping snapshot"
fi

# Collect current replica counts before scaling down
log "Collecting current replica counts"

SS_JSON=$(kubectl get statefulset -n "$NAMESPACE" -o json | \
jq '[.items[] | {key: .metadata.name, value: .spec.replicas}] | from_entries')

DEPLOY_JSON=$(kubectl get deployment -n "$NAMESPACE" -o json | \
jq '[.items[] | {key: .metadata.name, value: .spec.replicas}] | from_entries')

CRONJOB_JSON=$(kubectl get cronjob -n "$NAMESPACE" -o json | \
jq '[.items[] | select(.spec.suspend != true) | .metadata.name]')

STATE_JSON=$(jq -n \
--arg paused_at "$(date -Is)" \
--argjson statefulsets "$SS_JSON" \
--argjson deployments "$DEPLOY_JSON" \
--argjson cronjobs "$CRONJOB_JSON" \
'{paused_at: $paused_at, statefulsets: $statefulsets, deployments: $deployments, cronjobs: $cronjobs}')

log "Saving pause state to ConfigMap $CONFIGMAP_NAME"
kubectl create configmap "$CONFIGMAP_NAME" \
-n "$NAMESPACE" \
--from-literal=state="$STATE_JSON"

# Scale everything down except eth-devnet (L1 beacon chain cannot recover from long pauses)
log "Pausing namespace $NAMESPACE"
for item_type in statefulset deployment; do
for item in $(kubectl get $item_type -n $NAMESPACE -o jsonpath='{.items[*].metadata.name}'); do
kubectl scale -n $NAMESPACE $item_type/$item --replicas 0
for item in $(kubectl get "$item_type" -n "$NAMESPACE" -o json | \
jq -r '.items[] | select(.metadata.labels["app.kubernetes.io/name"] != "eth-devnet") | .metadata.name'); do
log " Scaling $item_type/$item to 0"
kubectl scale -n "$NAMESPACE" "$item_type/$item" --replicas 0
done
done

log "Suspending cronjobs"
for item in $(kubectl get cronjob -n $NAMESPACE -o jsonpath='{.items[*].metadata.name}'); do
kubectl -n $NAMESPACE patch cronjobs $item -p '{"spec" : {"suspend" : true }}'
done

log "Namespace $NAMESPACE paused successfully. State saved to ConfigMap $CONFIGMAP_NAME."
76 changes: 76 additions & 0 deletions spartan/scripts/network_resume.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env bash

set -euo pipefail

spartan=$(git rev-parse --show-toplevel)/spartan
scripts_dir=$spartan/scripts

log() { echo "[INFO] $(date -Is) - $*"; }
err() { echo "[ERROR] $(date -Is) - $*" >&2; }
die() { err "$*"; exit 1; }

usage() {
echo "Usage: $0 [namespace]"
echo ""
echo "Arguments:"
echo " namespace - Kubernetes namespace (default: from NAMESPACE env var)"
echo ""
echo "Environment variables:"
echo " NAMESPACE - K8s namespace (required if not passed as argument)"
echo ""
exit 1
}

NAMESPACE="${1:-${NAMESPACE:-}}"

if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
usage
fi

if [[ -z "$NAMESPACE" ]]; then
usage
fi

CONFIGMAP_NAME="network-pause-state"

# Read saved state
log "Reading pause state from ConfigMap $CONFIGMAP_NAME"
STATE_JSON=$(kubectl get configmap "$CONFIGMAP_NAME" -n "$NAMESPACE" -o jsonpath='{.data.state}') || \
die "ConfigMap $CONFIGMAP_NAME not found in namespace $NAMESPACE. Is the network paused?"

echo "$STATE_JSON" | jq . >/dev/null 2>&1 || die "Invalid JSON in ConfigMap $CONFIGMAP_NAME"
paused_at=$(echo "$STATE_JSON" | jq -r '.paused_at')
log "Network was paused at $paused_at"

# Restore statefulset replicas
log "Restoring statefulsets"
for name in $(echo "$STATE_JSON" | jq -r '.statefulsets | keys[]'); do
replicas=$(echo "$STATE_JSON" | jq -r --arg name "$name" '.statefulsets[$name]')
if [[ "$replicas" -gt 0 ]]; then
log " Scaling statefulset/$name to $replicas replicas"
kubectl scale -n "$NAMESPACE" statefulset/"$name" --replicas "$replicas"
fi
done

# Restore deployment replicas
log "Restoring deployments"
for name in $(echo "$STATE_JSON" | jq -r '.deployments | keys[]'); do
replicas=$(echo "$STATE_JSON" | jq -r --arg name "$name" '.deployments[$name]')
if [[ "$replicas" -gt 0 ]]; then
log " Scaling deployment/$name to $replicas replicas"
kubectl scale -n "$NAMESPACE" deployment/"$name" --replicas "$replicas"
fi
done

# Unsuspend only cronjobs that were active before pause
log "Unsuspending cronjobs"
for name in $(echo "$STATE_JSON" | jq -r '.cronjobs[]'); do
log " Unsuspending cronjob/$name"
kubectl -n "$NAMESPACE" patch cronjobs "$name" -p '{"spec" : {"suspend" : false }}'
done

# Clean up
log "Cleaning up ConfigMap $CONFIGMAP_NAME"
kubectl delete configmap "$CONFIGMAP_NAME" -n "$NAMESPACE"

log "Namespace $NAMESPACE resumed successfully."
Loading