From fa37790b9177e67c18c22d51708150797aa94411 Mon Sep 17 00:00:00 2001 From: Andrew Nguyen Date: Fri, 10 Apr 2026 18:29:35 +0000 Subject: [PATCH 1/7] feat(pipeline): add ACSA deployment scripts and configuration files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - create deploy-acsa.sh for deploying Azure Container Storage - add configuration files for ACSA PVC, subvolume, and ingest policy - update azure-services.txt with new service names πŸ”§ - Generated by Copilot --- .cspell/azure-services.txt | 7 + data-pipeline/arc/acsa-edge-subvolume.yaml | 14 + data-pipeline/arc/acsa-ingest-policy.yaml | 13 + data-pipeline/arc/acsa-pvc.yaml | 13 + data-pipeline/setup/defaults.conf | 33 ++ data-pipeline/setup/deploy-acsa.sh | 394 +++++++++++++++++++++ 6 files changed, 474 insertions(+) create mode 100644 data-pipeline/arc/acsa-edge-subvolume.yaml create mode 100644 data-pipeline/arc/acsa-ingest-policy.yaml create mode 100644 data-pipeline/arc/acsa-pvc.yaml create mode 100644 data-pipeline/setup/defaults.conf create mode 100755 data-pipeline/setup/deploy-acsa.sh diff --git a/.cspell/azure-services.txt b/.cspell/azure-services.txt index 42e9749b..debcd7ec 100644 --- a/.cspell/azure-services.txt +++ b/.cspell/azure-services.txt @@ -31,11 +31,15 @@ arcbox arcgis arck arcsight +arccontainerstorage armttk azacsnap azapi azcmagent azcopy +certmanagement +connectedk8s +containerstorage azdo azmk azmon @@ -68,5 +72,8 @@ servicebus sharepoint snet southeastasia +storageaccountendpoint wasbs westus +edgevolume +edgevolumes diff --git a/data-pipeline/arc/acsa-edge-subvolume.yaml b/data-pipeline/arc/acsa-edge-subvolume.yaml new file mode 100644 index 00000000..d6d618b7 --- /dev/null +++ b/data-pipeline/arc/acsa-edge-subvolume.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: arccontainerstorage.azure.net/v1 +kind: EdgeSubvolume +metadata: + name: ${SUBVOLUME_NAME} + namespace: ${EDGE_NAMESPACE} +spec: + edgevolume: ${ACSA_PVC_NAME} + path: ${SUBVOLUME_PATH} + auth: + authType: MANAGED_IDENTITY + storageaccountendpoint: "https://${STORAGE_ACCOUNT_NAME}.blob.core.windows.net/" + container: ${BLOB_CONTAINER_NAME} + ingestPolicy: ${ACSA_INGEST_POLICY} diff --git a/data-pipeline/arc/acsa-ingest-policy.yaml b/data-pipeline/arc/acsa-ingest-policy.yaml new file mode 100644 index 00000000..a655be30 --- /dev/null +++ b/data-pipeline/arc/acsa-ingest-policy.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: arccontainerstorage.azure.net/v1 +kind: EdgeIngestPolicy +metadata: + name: ${ACSA_INGEST_POLICY_NAME} + namespace: ${EDGE_NAMESPACE} +spec: + ingest: + order: ${ACSA_INGEST_ORDER} + minDelaySec: ${ACSA_INGEST_MIN_DELAY_SEC} + eviction: + order: ${ACSA_EVICTION_ORDER} + minDelaySec: ${ACSA_EVICTION_MIN_DELAY_SEC} diff --git a/data-pipeline/arc/acsa-pvc.yaml b/data-pipeline/arc/acsa-pvc.yaml new file mode 100644 index 00000000..01b525cf --- /dev/null +++ b/data-pipeline/arc/acsa-pvc.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ${ACSA_PVC_NAME} + namespace: ${EDGE_NAMESPACE} +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: ${ACSA_PVC_SIZE} + storageClassName: ${ACSA_STORAGE_CLASS} diff --git a/data-pipeline/setup/defaults.conf b/data-pipeline/setup/defaults.conf new file mode 100644 index 00000000..54a88807 --- /dev/null +++ b/data-pipeline/setup/defaults.conf @@ -0,0 +1,33 @@ +# Default configuration for data-pipeline setup deployment scripts +# Override via command-line arguments or environment variables + +# Default Terraform Directory (relative to data-pipeline/setup) +DEFAULT_TF_DIR="${DEFAULT_TF_DIR:-../../infrastructure/terraform}" + +# Arc Namespace Configuration +EDGE_NAMESPACE="${EDGE_NAMESPACE:-data-pipeline}" + +# ACSA Extension Configuration +ACSA_EXTENSION_NAME="${ACSA_EXTENSION_NAME:-azure-arc-containerstorage}" +ACSA_EXTENSION_VERSION="${ACSA_EXTENSION_VERSION:-2.6.0}" +ACSA_RELEASE_TRAIN="${ACSA_RELEASE_TRAIN:-stable}" +ACSA_DISK_STORAGE_CLASS="${ACSA_DISK_STORAGE_CLASS:-default,local-path}" + +# ACSA Volume Configuration +ACSA_STORAGE_CLASS="${ACSA_STORAGE_CLASS:-cloud-backed-sc}" +ACSA_PVC_NAME="${ACSA_PVC_NAME:-recording-data}" +ACSA_PVC_SIZE="${ACSA_PVC_SIZE:-50Gi}" + +# Blob Sync Configuration +BLOB_CONTAINER_NAME="${BLOB_CONTAINER_NAME:-datasets}" +SUBVOLUME_NAME="${SUBVOLUME_NAME:-recordings}" +SUBVOLUME_PATH="${SUBVOLUME_PATH:-recordings}" +ACSA_INGEST_POLICY="${ACSA_INGEST_POLICY:-edgeingestpolicy-default}" + +# Optional custom EdgeIngestPolicy configuration +ACSA_SHOULD_APPLY_INGEST_POLICY="${ACSA_SHOULD_APPLY_INGEST_POLICY:-false}" +ACSA_INGEST_POLICY_NAME="${ACSA_INGEST_POLICY_NAME:-ros2-bag-ingest-policy}" +ACSA_INGEST_ORDER="${ACSA_INGEST_ORDER:-oldest-first}" +ACSA_INGEST_MIN_DELAY_SEC="${ACSA_INGEST_MIN_DELAY_SEC:-30}" +ACSA_EVICTION_ORDER="${ACSA_EVICTION_ORDER:-unordered}" +ACSA_EVICTION_MIN_DELAY_SEC="${ACSA_EVICTION_MIN_DELAY_SEC:-600}" \ No newline at end of file diff --git a/data-pipeline/setup/deploy-acsa.sh b/data-pipeline/setup/deploy-acsa.sh new file mode 100755 index 00000000..e5cd409f --- /dev/null +++ b/data-pipeline/setup/deploy-acsa.sh @@ -0,0 +1,394 @@ +#!/usr/bin/env bash +# Deploy Azure Container Storage for Arc (ACSA) resources for ROS2 recording sync +set -o errexit -o nounset + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || (cd "$SCRIPT_DIR/../.." && pwd))" +# shellcheck source=../../scripts/lib/common.sh +# shellcheck disable=SC1091 +source "$REPO_ROOT/scripts/lib/common.sh" +# shellcheck source=defaults.conf +# shellcheck disable=SC1091 +source "$SCRIPT_DIR/defaults.conf" + +ARC_DIR="$SCRIPT_DIR/../arc" + +show_help() { + cat << EOF +Usage: $(basename "$0") [OPTIONS] + +Install cert-manager and ACSA on an Arc-connected edge cluster, assign Blob role, +create a Blob container, and apply ACSA PVC/subvolume manifests. + +OPTIONS: + -h, --help Show this help message + -t, --tf-dir DIR Terraform directory (default: $DEFAULT_TF_DIR) + --cluster-name NAME Arc-connected cluster name (or ARC_CLUSTER_NAME) + --cluster-resource-group NAME Resource group of Arc cluster (or ARC_RESOURCE_GROUP) + --storage-account NAME Storage account name override + --storage-resource-group NAME Storage account resource group (or STORAGE_ACCOUNT_RESOURCE_GROUP) + --connectivity-mode MODE direct|proxy (default: direct) + --proxy-port PORT Arc proxy port (default: 47011) + --config-preview Print configuration and exit + +EXAMPLES: + $(basename "$0") --cluster-name my-edge --cluster-resource-group rg-edge + $(basename "$0") --connectivity-mode proxy --cluster-name my-edge --cluster-resource-group rg-edge +EOF +} + +wait_for_extension_state() { + local extension_name="${1:?extension name required}" + local desired_state="${2:?desired state required}" + local max_attempts="${3:-30}" + local sleep_seconds="${4:-10}" + local provisioning_state="" + + for ((attempt = 1; attempt <= max_attempts; attempt++)); do + provisioning_state=$(az k8s-extension show \ + --name "$extension_name" \ + --cluster-name "$cluster_name" \ + --resource-group "$cluster_resource_group" \ + --cluster-type connectedClusters \ + --query provisioningState -o tsv 2>/dev/null || true) + + if [[ "$provisioning_state" == "$desired_state" ]]; then + info "Extension $extension_name reached state: $desired_state" + return 0 + fi + + if [[ "$provisioning_state" == "Failed" ]]; then + fatal "Extension $extension_name provisioning failed" + fi + + info "Waiting for extension $extension_name ($attempt/$max_attempts): ${provisioning_state:-pending}" + sleep "$sleep_seconds" + done + + fatal "Timed out waiting for extension $extension_name to reach state $desired_state" +} + +start_arc_proxy() { + local kubeconfig_file="${1:?kubeconfig file required}" + local log_file="${2:?log file required}" + + info "Starting Arc proxy for cluster $cluster_name on port $proxy_port..." + az connectedk8s proxy \ + --name "$cluster_name" \ + --resource-group "$cluster_resource_group" \ + --file "$kubeconfig_file" \ + --port "$proxy_port" \ + >"$log_file" 2>&1 & + + proxy_pid=$! + export KUBECONFIG="$kubeconfig_file" + + for ((attempt = 1; attempt <= 20; attempt++)); do + if kubectl cluster-info >/dev/null 2>&1; then + info "Arc proxy connectivity established" + return 0 + fi + sleep 2 + done + + fatal "Failed to establish kubectl connectivity through Arc proxy (log: $log_file)" +} + +cleanup_proxy() { + if [[ -n "${proxy_pid:-}" ]] && kill -0 "$proxy_pid" >/dev/null 2>&1; then + kill "$proxy_pid" >/dev/null 2>&1 || true + fi + + if [[ -n "${proxy_kubeconfig:-}" && -f "$proxy_kubeconfig" ]]; then + rm -f "$proxy_kubeconfig" + fi + + if [[ -n "${proxy_log_file:-}" && -f "$proxy_log_file" ]]; then + rm -f "$proxy_log_file" + fi + + if [[ -n "${render_dir:-}" && -d "$render_dir" ]]; then + rm -rf "$render_dir" + fi + + if [[ -n "${role_assignment_error_file:-}" && -f "$role_assignment_error_file" ]]; then + rm -f "$role_assignment_error_file" + fi +} + +install_or_update_extension() { + local extension_name="${1:?extension name required}" + local extension_type="${2:?extension type required}" + local extension_version="${3:?extension version required}" + local release_train="${4:?release train required}" + shift 4 + local config_settings=("$@") + + local common_args=( + --name "$extension_name" + --cluster-name "$cluster_name" + --resource-group "$cluster_resource_group" + --cluster-type connectedClusters + --version "$extension_version" + --release-train "$release_train" + --auto-upgrade-minor-version false + ) + + if az k8s-extension show "${common_args[@]}" >/dev/null 2>&1; then + info "Updating extension $extension_name ($extension_type)..." + az k8s-extension update "${common_args[@]}" \ + --configuration-settings "${config_settings[@]}" \ + --yes \ + --output none + else + info "Creating extension $extension_name ($extension_type)..." + az k8s-extension create "${common_args[@]}" \ + --extension-type "$extension_type" \ + --configuration-settings "${config_settings[@]}" \ + --output none + fi +} + +# Defaults +tf_dir="$SCRIPT_DIR/$DEFAULT_TF_DIR" +cluster_name="${ARC_CLUSTER_NAME:-}" +cluster_resource_group="${ARC_RESOURCE_GROUP:-}" +storage_account_name="${STORAGE_ACCOUNT_NAME:-}" +storage_account_resource_group="${STORAGE_ACCOUNT_RESOURCE_GROUP:-}" +storage_scope="" +connectivity_mode="${ACSA_CONNECTIVITY_MODE:-direct}" +proxy_port="${ACSA_PROXY_PORT:-47011}" +config_preview=false +cert_manager_extension_name="${CERT_MANAGER_EXTENSION_NAME:-arc-cert-manager}" +cert_manager_extension_version="${CERT_MANAGER_EXTENSION_VERSION:-0.10.2}" +cert_manager_release_train="${CERT_MANAGER_RELEASE_TRAIN:-stable}" +principal_id_max_retries="${ACSA_PRINCIPAL_ID_MAX_RETRIES:-12}" +principal_id_retry_seconds="${ACSA_PRINCIPAL_ID_RETRY_SECONDS:-10}" + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) show_help; exit 0 ;; + -t|--tf-dir) tf_dir="$2"; shift 2 ;; + --cluster-name) cluster_name="$2"; shift 2 ;; + --cluster-resource-group) cluster_resource_group="$2"; shift 2 ;; + --storage-account) storage_account_name="$2"; shift 2 ;; + --storage-resource-group) storage_account_resource_group="$2"; shift 2 ;; + --connectivity-mode) connectivity_mode="$2"; shift 2 ;; + --proxy-port) proxy_port="$2"; shift 2 ;; + --config-preview) config_preview=true; shift ;; + *) fatal "Unknown option: $1" ;; + esac +done + +require_tools az terraform jq kubectl envsubst +require_az_extension k8s-extension +require_az_extension connectedk8s + +#------------------------------------------------------------------------------ +# Gather Configuration +#------------------------------------------------------------------------------ + +if [[ -f "$tf_dir/terraform.tfstate" ]]; then + info "Reading terraform outputs from $tf_dir..." + tf_output=$(read_terraform_outputs "$tf_dir") + + if [[ -z "$cluster_resource_group" ]]; then + cluster_resource_group=$(tf_get "$tf_output" "resource_group.value.name" "") + fi + + if [[ -z "$storage_account_name" ]]; then + storage_account_name=$(tf_get "$tf_output" "data_lake_storage_account.value.name" "") + storage_scope=$(tf_get "$tf_output" "data_lake_storage_account.value.id" "") + fi + + if [[ -z "$storage_account_name" ]]; then + storage_account_name=$(tf_get "$tf_output" "storage_account.value.name" "") + storage_scope=$(tf_get "$tf_output" "storage_account.value.id" "") + fi + + if [[ -n "$storage_scope" && -z "$storage_account_resource_group" ]]; then + storage_account_resource_group=$(echo "$storage_scope" | awk -F'/' '{for (i = 1; i <= NF; i++) if ($i == "resourceGroups") {print $(i+1); exit}}') + fi +else + warn "terraform.tfstate not found in $tf_dir; skipping terraform output discovery" +fi + +if [[ "$connectivity_mode" != "direct" && "$connectivity_mode" != "proxy" ]]; then + fatal "Invalid connectivity mode: $connectivity_mode (expected: direct|proxy)" +fi + +if [[ "$config_preview" == "true" ]]; then + section "Configuration Preview" + print_kv "TF Dir" "$tf_dir" + print_kv "Cluster Name" "${cluster_name:-}" + print_kv "Cluster RG" "${cluster_resource_group:-}" + print_kv "Storage Account" "${storage_account_name:-}" + print_kv "Storage RG" "${storage_account_resource_group:-}" + print_kv "Connectivity" "$connectivity_mode" + print_kv "Edge Namespace" "$EDGE_NAMESPACE" + print_kv "ACSA Extension" "$ACSA_EXTENSION_NAME@$ACSA_EXTENSION_VERSION ($ACSA_RELEASE_TRAIN)" + print_kv "Cert Extension" "$cert_manager_extension_name@$cert_manager_extension_version ($cert_manager_release_train)" + print_kv "PVC" "$ACSA_PVC_NAME ($ACSA_PVC_SIZE, $ACSA_STORAGE_CLASS)" + print_kv "Subvolume" "$SUBVOLUME_NAME:$SUBVOLUME_PATH" + print_kv "Blob Container" "$BLOB_CONTAINER_NAME" + print_kv "Apply Ingest Policy" "$ACSA_SHOULD_APPLY_INGEST_POLICY" + info "Config preview mode - exiting without changes" + exit 0 +fi + +[[ -n "$cluster_name" ]] || fatal "Cluster name is required (--cluster-name or ARC_CLUSTER_NAME)" +[[ -n "$cluster_resource_group" ]] || fatal "Cluster resource group is required (--cluster-resource-group or ARC_RESOURCE_GROUP)" +[[ -n "$storage_account_name" ]] || fatal "Storage account name is required (--storage-account or terraform output)" + +subscription_id=$(az account show --query id -o tsv) +if [[ -z "$storage_scope" ]]; then + storage_scope="/subscriptions/${subscription_id}/resourceGroups/${storage_account_resource_group:-$cluster_resource_group}/providers/Microsoft.Storage/storageAccounts/${storage_account_name}" +fi + +#------------------------------------------------------------------------------ +# Prepare Cluster Connectivity +#------------------------------------------------------------------------------ +section "Prepare Cluster Connectivity" + +proxy_pid="" +proxy_kubeconfig="" +proxy_log_file="" +render_dir="" +role_assignment_error_file="" +trap cleanup_proxy EXIT + +if [[ "$connectivity_mode" == "proxy" ]]; then + proxy_kubeconfig=$(mktemp) + proxy_log_file=$(mktemp) + start_arc_proxy "$proxy_kubeconfig" "$proxy_log_file" +else + verify_cluster_connectivity +fi + +ensure_namespace "$EDGE_NAMESPACE" + +#------------------------------------------------------------------------------ +# Install cert-manager and ACSA Extensions +#------------------------------------------------------------------------------ +section "Install cert-manager and ACSA Extensions" + +install_or_update_extension \ + "$cert_manager_extension_name" \ + "microsoft.certmanagement" \ + "$cert_manager_extension_version" \ + "$cert_manager_release_train" \ + "global.telemetry.enabled=true" +wait_for_extension_state "$cert_manager_extension_name" "Succeeded" + +install_or_update_extension \ + "$ACSA_EXTENSION_NAME" \ + "microsoft.arc.containerstorage" \ + "$ACSA_EXTENSION_VERSION" \ + "$ACSA_RELEASE_TRAIN" \ + "edgeStorageConfiguration.create=true" \ + "feature.diskStorageClass=$ACSA_DISK_STORAGE_CLASS" +wait_for_extension_state "$ACSA_EXTENSION_NAME" "Succeeded" + +#------------------------------------------------------------------------------ +# Assign Blob Role to ACSA Managed Identity +#------------------------------------------------------------------------------ +section "Assign Blob Role to ACSA Managed Identity" + +acsa_principal_id="" +for ((attempt = 1; attempt <= principal_id_max_retries; attempt++)); do + acsa_principal_id=$(az k8s-extension show \ + --name "$ACSA_EXTENSION_NAME" \ + --cluster-name "$cluster_name" \ + --resource-group "$cluster_resource_group" \ + --cluster-type connectedClusters \ + --query identity.principalId -o tsv 2>/dev/null || true) + + if [[ -n "$acsa_principal_id" && "$acsa_principal_id" != "null" ]]; then + break + fi + + info "Waiting for ACSA principal ID ($attempt/$principal_id_max_retries)..." + sleep "$principal_id_retry_seconds" +done + +[[ -n "$acsa_principal_id" && "$acsa_principal_id" != "null" ]] || fatal "ACSA managed identity principal ID is unavailable" + +role_assignment_error_file=$(mktemp) +if az role assignment create \ + --assignee-object-id "$acsa_principal_id" \ + --assignee-principal-type ServicePrincipal \ + --role "Storage Blob Data Owner" \ + --scope "$storage_scope" \ + --output none 2>"$role_assignment_error_file"; then + info "Storage Blob Data Owner role assigned" +else + if grep -qi "already exists" "$role_assignment_error_file"; then + info "Storage Blob Data Owner role assignment already exists" + else + cat "$role_assignment_error_file" >&2 + fatal "Failed to assign Storage Blob Data Owner role" + fi +fi + +#------------------------------------------------------------------------------ +# Create Container and Apply ACSA Manifests +#------------------------------------------------------------------------------ +section "Create Container and Apply ACSA Manifests" + +az storage container create \ + --account-name "$storage_account_name" \ + --name "$BLOB_CONTAINER_NAME" \ + --auth-mode login \ + --output none + +render_dir=$(mktemp -d) + +export EDGE_NAMESPACE +export ACSA_STORAGE_CLASS +export ACSA_PVC_NAME +export ACSA_PVC_SIZE +export STORAGE_ACCOUNT_NAME="$storage_account_name" +export BLOB_CONTAINER_NAME +export SUBVOLUME_NAME +export SUBVOLUME_PATH +export ACSA_INGEST_POLICY +export ACSA_INGEST_POLICY_NAME +export ACSA_INGEST_ORDER +export ACSA_INGEST_MIN_DELAY_SEC +export ACSA_EVICTION_ORDER +export ACSA_EVICTION_MIN_DELAY_SEC + +envsubst < "$ARC_DIR/acsa-pvc.yaml" > "$render_dir/acsa-pvc.yaml" +envsubst < "$ARC_DIR/acsa-edge-subvolume.yaml" > "$render_dir/acsa-edge-subvolume.yaml" +kubectl apply -f "$render_dir/acsa-pvc.yaml" +kubectl apply -f "$render_dir/acsa-edge-subvolume.yaml" + +if [[ "$ACSA_SHOULD_APPLY_INGEST_POLICY" == "true" ]]; then + envsubst < "$ARC_DIR/acsa-ingest-policy.yaml" > "$render_dir/acsa-ingest-policy.yaml" + kubectl apply -f "$render_dir/acsa-ingest-policy.yaml" +fi + +kubectl -n "$EDGE_NAMESPACE" wait --for=condition=Bound "pvc/$ACSA_PVC_NAME" --timeout=180s + +if kubectl -n "$EDGE_NAMESPACE" get "edgevolumes/$ACSA_PVC_NAME" >/dev/null 2>&1; then + kubectl -n "$EDGE_NAMESPACE" wait --for=jsonpath='{.status.state}'=deployed "edgevolumes/$ACSA_PVC_NAME" --timeout=180s +else + warn "EdgeVolume $ACSA_PVC_NAME not found yet; skipping deployed-state wait" +fi + +#------------------------------------------------------------------------------ +# Deployment Summary +#------------------------------------------------------------------------------ +section "Deployment Summary" +print_kv "Cluster" "$cluster_name" +print_kv "Cluster RG" "$cluster_resource_group" +print_kv "Connectivity" "$connectivity_mode" +print_kv "Namespace" "$EDGE_NAMESPACE" +print_kv "ACSA Extension" "$ACSA_EXTENSION_NAME" +print_kv "Storage Account" "$storage_account_name" +print_kv "Blob Container" "$BLOB_CONTAINER_NAME" +print_kv "PVC" "$ACSA_PVC_NAME" +print_kv "Subvolume" "$SUBVOLUME_NAME" +print_kv "Ingest Policy" "$([[ "$ACSA_SHOULD_APPLY_INGEST_POLICY" == "true" ]] && echo "$ACSA_INGEST_POLICY_NAME" || echo "default")" + +info "ACSA deployment complete" \ No newline at end of file From 163565b4e85937912b14bfb5f7b7cbf7873687c4 Mon Sep 17 00:00:00 2001 From: Andrew Nguyen Date: Fri, 10 Apr 2026 18:30:06 +0000 Subject: [PATCH 2/7] fix(pipeline): ensure newline at end of defaults.conf and deploy-acsa.sh for proper script execution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit πŸ”§ - Generated by Copilot --- data-pipeline/setup/defaults.conf | 2 +- data-pipeline/setup/deploy-acsa.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/data-pipeline/setup/defaults.conf b/data-pipeline/setup/defaults.conf index 54a88807..b6a4e461 100644 --- a/data-pipeline/setup/defaults.conf +++ b/data-pipeline/setup/defaults.conf @@ -30,4 +30,4 @@ ACSA_INGEST_POLICY_NAME="${ACSA_INGEST_POLICY_NAME:-ros2-bag-ingest-policy}" ACSA_INGEST_ORDER="${ACSA_INGEST_ORDER:-oldest-first}" ACSA_INGEST_MIN_DELAY_SEC="${ACSA_INGEST_MIN_DELAY_SEC:-30}" ACSA_EVICTION_ORDER="${ACSA_EVICTION_ORDER:-unordered}" -ACSA_EVICTION_MIN_DELAY_SEC="${ACSA_EVICTION_MIN_DELAY_SEC:-600}" \ No newline at end of file +ACSA_EVICTION_MIN_DELAY_SEC="${ACSA_EVICTION_MIN_DELAY_SEC:-600}" diff --git a/data-pipeline/setup/deploy-acsa.sh b/data-pipeline/setup/deploy-acsa.sh index e5cd409f..bf6f8c21 100755 --- a/data-pipeline/setup/deploy-acsa.sh +++ b/data-pipeline/setup/deploy-acsa.sh @@ -391,4 +391,4 @@ print_kv "PVC" "$ACSA_PVC_NAME" print_kv "Subvolume" "$SUBVOLUME_NAME" print_kv "Ingest Policy" "$([[ "$ACSA_SHOULD_APPLY_INGEST_POLICY" == "true" ]] && echo "$ACSA_INGEST_POLICY_NAME" || echo "default")" -info "ACSA deployment complete" \ No newline at end of file +info "ACSA deployment complete" From 13e94eb6b6ec99e183cd6d39d21c566391240eee Mon Sep 17 00:00:00 2001 From: Andrew Nguyen Date: Fri, 10 Apr 2026 20:48:17 +0000 Subject: [PATCH 3/7] fix(pipeline): update PVC wait command to use jsonpath for status check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit πŸ”§ - Generated by Copilot --- data-pipeline/setup/deploy-acsa.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-pipeline/setup/deploy-acsa.sh b/data-pipeline/setup/deploy-acsa.sh index bf6f8c21..86307a78 100755 --- a/data-pipeline/setup/deploy-acsa.sh +++ b/data-pipeline/setup/deploy-acsa.sh @@ -368,7 +368,7 @@ if [[ "$ACSA_SHOULD_APPLY_INGEST_POLICY" == "true" ]]; then kubectl apply -f "$render_dir/acsa-ingest-policy.yaml" fi -kubectl -n "$EDGE_NAMESPACE" wait --for=condition=Bound "pvc/$ACSA_PVC_NAME" --timeout=180s +kubectl -n "$EDGE_NAMESPACE" wait --for=jsonpath='{.status.phase}'=Bound "pvc/$ACSA_PVC_NAME" --timeout=180s if kubectl -n "$EDGE_NAMESPACE" get "edgevolumes/$ACSA_PVC_NAME" >/dev/null 2>&1; then kubectl -n "$EDGE_NAMESPACE" wait --for=jsonpath='{.status.state}'=deployed "edgevolumes/$ACSA_PVC_NAME" --timeout=180s From 3d1dee29488a06223e7b20ee2bffbc0ff6db94b4 Mon Sep 17 00:00:00 2001 From: Andrew Nguyen Date: Mon, 13 Apr 2026 18:38:13 +0000 Subject: [PATCH 4/7] feat(pipeline): replace EdgeSubvolume and EdgeIngestPolicy with IngestSubvolume configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - remove deprecated EdgeSubvolume and EdgeIngestPolicy YAML files - add new IngestSubvolume YAML file with updated specifications - update deploy script to reflect changes in ingest policy handling - modify defaults configuration for new ingest behavior πŸ”„ - Generated by Copilot --- data-pipeline/arc/acsa-edge-subvolume.yaml | 14 -------------- data-pipeline/arc/acsa-ingest-policy.yaml | 13 ------------- data-pipeline/arc/acsa-ingest-subvolume.yaml | 20 ++++++++++++++++++++ data-pipeline/setup/defaults.conf | 8 +++----- data-pipeline/setup/deploy-acsa.sh | 20 +++++++++----------- 5 files changed, 32 insertions(+), 43 deletions(-) delete mode 100644 data-pipeline/arc/acsa-edge-subvolume.yaml delete mode 100644 data-pipeline/arc/acsa-ingest-policy.yaml create mode 100644 data-pipeline/arc/acsa-ingest-subvolume.yaml diff --git a/data-pipeline/arc/acsa-edge-subvolume.yaml b/data-pipeline/arc/acsa-edge-subvolume.yaml deleted file mode 100644 index d6d618b7..00000000 --- a/data-pipeline/arc/acsa-edge-subvolume.yaml +++ /dev/null @@ -1,14 +0,0 @@ ---- -apiVersion: arccontainerstorage.azure.net/v1 -kind: EdgeSubvolume -metadata: - name: ${SUBVOLUME_NAME} - namespace: ${EDGE_NAMESPACE} -spec: - edgevolume: ${ACSA_PVC_NAME} - path: ${SUBVOLUME_PATH} - auth: - authType: MANAGED_IDENTITY - storageaccountendpoint: "https://${STORAGE_ACCOUNT_NAME}.blob.core.windows.net/" - container: ${BLOB_CONTAINER_NAME} - ingestPolicy: ${ACSA_INGEST_POLICY} diff --git a/data-pipeline/arc/acsa-ingest-policy.yaml b/data-pipeline/arc/acsa-ingest-policy.yaml deleted file mode 100644 index a655be30..00000000 --- a/data-pipeline/arc/acsa-ingest-policy.yaml +++ /dev/null @@ -1,13 +0,0 @@ ---- -apiVersion: arccontainerstorage.azure.net/v1 -kind: EdgeIngestPolicy -metadata: - name: ${ACSA_INGEST_POLICY_NAME} - namespace: ${EDGE_NAMESPACE} -spec: - ingest: - order: ${ACSA_INGEST_ORDER} - minDelaySec: ${ACSA_INGEST_MIN_DELAY_SEC} - eviction: - order: ${ACSA_EVICTION_ORDER} - minDelaySec: ${ACSA_EVICTION_MIN_DELAY_SEC} diff --git a/data-pipeline/arc/acsa-ingest-subvolume.yaml b/data-pipeline/arc/acsa-ingest-subvolume.yaml new file mode 100644 index 00000000..7d69d5db --- /dev/null +++ b/data-pipeline/arc/acsa-ingest-subvolume.yaml @@ -0,0 +1,20 @@ +--- +apiVersion: arccontainerstorage.azure.net/v1 +kind: IngestSubvolume +metadata: + name: ${SUBVOLUME_NAME} + namespace: ${EDGE_NAMESPACE} +spec: + edgevolume: ${ACSA_PVC_NAME} + path: ${SUBVOLUME_PATH} + authentication: + authType: MANAGED_IDENTITY + storageAccountEndpoint: "https://${STORAGE_ACCOUNT_NAME}.blob.core.windows.net/" + containerName: ${BLOB_CONTAINER_NAME} + ingest: + order: ${ACSA_INGEST_ORDER} + minDelaySec: ${ACSA_INGEST_MIN_DELAY_SEC} + eviction: + order: ${ACSA_EVICTION_ORDER} + minDelaySec: ${ACSA_EVICTION_MIN_DELAY_SEC} + onDelete: ${ACSA_ON_DELETE} diff --git a/data-pipeline/setup/defaults.conf b/data-pipeline/setup/defaults.conf index b6a4e461..b003403b 100644 --- a/data-pipeline/setup/defaults.conf +++ b/data-pipeline/setup/defaults.conf @@ -9,7 +9,7 @@ EDGE_NAMESPACE="${EDGE_NAMESPACE:-data-pipeline}" # ACSA Extension Configuration ACSA_EXTENSION_NAME="${ACSA_EXTENSION_NAME:-azure-arc-containerstorage}" -ACSA_EXTENSION_VERSION="${ACSA_EXTENSION_VERSION:-2.6.0}" +ACSA_EXTENSION_VERSION="${ACSA_EXTENSION_VERSION:-2.11.2}" ACSA_RELEASE_TRAIN="${ACSA_RELEASE_TRAIN:-stable}" ACSA_DISK_STORAGE_CLASS="${ACSA_DISK_STORAGE_CLASS:-default,local-path}" @@ -22,12 +22,10 @@ ACSA_PVC_SIZE="${ACSA_PVC_SIZE:-50Gi}" BLOB_CONTAINER_NAME="${BLOB_CONTAINER_NAME:-datasets}" SUBVOLUME_NAME="${SUBVOLUME_NAME:-recordings}" SUBVOLUME_PATH="${SUBVOLUME_PATH:-recordings}" -ACSA_INGEST_POLICY="${ACSA_INGEST_POLICY:-edgeingestpolicy-default}" -# Optional custom EdgeIngestPolicy configuration -ACSA_SHOULD_APPLY_INGEST_POLICY="${ACSA_SHOULD_APPLY_INGEST_POLICY:-false}" -ACSA_INGEST_POLICY_NAME="${ACSA_INGEST_POLICY_NAME:-ros2-bag-ingest-policy}" +# IngestSubvolume synchronization behavior ACSA_INGEST_ORDER="${ACSA_INGEST_ORDER:-oldest-first}" ACSA_INGEST_MIN_DELAY_SEC="${ACSA_INGEST_MIN_DELAY_SEC:-30}" ACSA_EVICTION_ORDER="${ACSA_EVICTION_ORDER:-unordered}" ACSA_EVICTION_MIN_DELAY_SEC="${ACSA_EVICTION_MIN_DELAY_SEC:-600}" +ACSA_ON_DELETE="${ACSA_ON_DELETE:-trigger-immediate-ingest}" diff --git a/data-pipeline/setup/deploy-acsa.sh b/data-pipeline/setup/deploy-acsa.sh index 86307a78..1d18b220 100755 --- a/data-pipeline/setup/deploy-acsa.sh +++ b/data-pipeline/setup/deploy-acsa.sh @@ -231,7 +231,9 @@ if [[ "$config_preview" == "true" ]]; then print_kv "PVC" "$ACSA_PVC_NAME ($ACSA_PVC_SIZE, $ACSA_STORAGE_CLASS)" print_kv "Subvolume" "$SUBVOLUME_NAME:$SUBVOLUME_PATH" print_kv "Blob Container" "$BLOB_CONTAINER_NAME" - print_kv "Apply Ingest Policy" "$ACSA_SHOULD_APPLY_INGEST_POLICY" + print_kv "Ingest" "$ACSA_INGEST_ORDER (${ACSA_INGEST_MIN_DELAY_SEC}s)" + print_kv "Eviction" "$ACSA_EVICTION_ORDER (${ACSA_EVICTION_MIN_DELAY_SEC}s)" + print_kv "On Delete" "$ACSA_ON_DELETE" info "Config preview mode - exiting without changes" exit 0 fi @@ -351,22 +353,16 @@ export STORAGE_ACCOUNT_NAME="$storage_account_name" export BLOB_CONTAINER_NAME export SUBVOLUME_NAME export SUBVOLUME_PATH -export ACSA_INGEST_POLICY -export ACSA_INGEST_POLICY_NAME export ACSA_INGEST_ORDER export ACSA_INGEST_MIN_DELAY_SEC export ACSA_EVICTION_ORDER export ACSA_EVICTION_MIN_DELAY_SEC +export ACSA_ON_DELETE envsubst < "$ARC_DIR/acsa-pvc.yaml" > "$render_dir/acsa-pvc.yaml" -envsubst < "$ARC_DIR/acsa-edge-subvolume.yaml" > "$render_dir/acsa-edge-subvolume.yaml" +envsubst < "$ARC_DIR/acsa-ingest-subvolume.yaml" > "$render_dir/acsa-ingest-subvolume.yaml" kubectl apply -f "$render_dir/acsa-pvc.yaml" -kubectl apply -f "$render_dir/acsa-edge-subvolume.yaml" - -if [[ "$ACSA_SHOULD_APPLY_INGEST_POLICY" == "true" ]]; then - envsubst < "$ARC_DIR/acsa-ingest-policy.yaml" > "$render_dir/acsa-ingest-policy.yaml" - kubectl apply -f "$render_dir/acsa-ingest-policy.yaml" -fi +kubectl apply -f "$render_dir/acsa-ingest-subvolume.yaml" kubectl -n "$EDGE_NAMESPACE" wait --for=jsonpath='{.status.phase}'=Bound "pvc/$ACSA_PVC_NAME" --timeout=180s @@ -389,6 +385,8 @@ print_kv "Storage Account" "$storage_account_name" print_kv "Blob Container" "$BLOB_CONTAINER_NAME" print_kv "PVC" "$ACSA_PVC_NAME" print_kv "Subvolume" "$SUBVOLUME_NAME" -print_kv "Ingest Policy" "$([[ "$ACSA_SHOULD_APPLY_INGEST_POLICY" == "true" ]] && echo "$ACSA_INGEST_POLICY_NAME" || echo "default")" +print_kv "Ingest" "$ACSA_INGEST_ORDER (${ACSA_INGEST_MIN_DELAY_SEC}s)" +print_kv "Eviction" "$ACSA_EVICTION_ORDER (${ACSA_EVICTION_MIN_DELAY_SEC}s)" +print_kv "On Delete" "$ACSA_ON_DELETE" info "ACSA deployment complete" From 58e607559e537bbdc3f590ebaaf17db3e096d712 Mon Sep 17 00:00:00 2001 From: Andrew Nguyen Date: Mon, 13 Apr 2026 20:15:17 +0000 Subject: [PATCH 5/7] fix(pipeline): improve cleanup_proxy function to ensure proper termination of proxy processes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit πŸ”§ - Generated by Copilot --- data-pipeline/setup/deploy-acsa.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/data-pipeline/setup/deploy-acsa.sh b/data-pipeline/setup/deploy-acsa.sh index 1d18b220..5d1db981 100755 --- a/data-pipeline/setup/deploy-acsa.sh +++ b/data-pipeline/setup/deploy-acsa.sh @@ -96,7 +96,9 @@ start_arc_proxy() { cleanup_proxy() { if [[ -n "${proxy_pid:-}" ]] && kill -0 "$proxy_pid" >/dev/null 2>&1; then + pkill -P "$proxy_pid" >/dev/null 2>&1 || true kill "$proxy_pid" >/dev/null 2>&1 || true + wait "$proxy_pid" 2>/dev/null || true fi if [[ -n "${proxy_kubeconfig:-}" && -f "$proxy_kubeconfig" ]]; then From da08b710fd27af4da8cc1d91faf2d8217cb3afcf Mon Sep 17 00:00:00 2001 From: Andrew Nguyen Date: Mon, 13 Apr 2026 20:23:44 +0000 Subject: [PATCH 6/7] feat(settings): add 'pkill' to general technical terms --- .cspell/general-technical.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.cspell/general-technical.txt b/.cspell/general-technical.txt index 96fc0265..e77448b6 100644 --- a/.cspell/general-technical.txt +++ b/.cspell/general-technical.txt @@ -953,6 +953,7 @@ pipelines pitj pivottable pkgs +pkill platform platformops platforms From ee6907f521f874e80ce8a673d4eecce9e5b7b3c2 Mon Sep 17 00:00:00 2001 From: Andrew Nguyen Date: Mon, 13 Apr 2026 20:51:18 +0000 Subject: [PATCH 7/7] feat(docs): add ACSA setup guide and update README for Azure Container Storage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - document ACSA deployment and configuration for edge storage - include manifest templates and prerequisites for setup - enhance existing README with ACSA-related information πŸ“„ - Generated by Copilot --- data-pipeline/arc/README.md | 10 + data-pipeline/setup/README.md | 9 + docs/data-pipeline/README.md | 1 + docs/data-pipeline/acsa-setup.md | 360 +++++++++++++++++++++++++++++++ 4 files changed, 380 insertions(+) create mode 100644 docs/data-pipeline/acsa-setup.md diff --git a/data-pipeline/arc/README.md b/data-pipeline/arc/README.md index 257e7b26..8e9f03d8 100644 --- a/data-pipeline/arc/README.md +++ b/data-pipeline/arc/README.md @@ -9,3 +9,13 @@ Kubernetes manifests and configuration for Arc-connected edge data pipeline comp | K8s manifests | Deployments, services, and config maps for recording workloads | | RBAC policies | Service accounts and role bindings for edge agents | | Flux configuration | GitOps sync definitions for automated edge deployment | +| ACSA manifests | PVC and IngestSubvolume templates for cloud-backed edge storage | + +## πŸ“„ ACSA Manifests + +| File | Description | +|-------------------------------|-------------------------------------------------------| +| `acsa-pvc.yaml` | ReadWriteMany PVC backed by ACSA `cloud-backed-sc` | +| `acsa-ingest-subvolume.yaml` | IngestSubvolume CRD defining Blob sync policy | + +These templates use `envsubst` variables rendered by `data-pipeline/setup/deploy-acsa.sh`. See the [ACSA setup guide](../../docs/data-pipeline/acsa-setup.md) for full deployment instructions. diff --git a/data-pipeline/setup/README.md b/data-pipeline/setup/README.md index e117e498..01b0642e 100644 --- a/data-pipeline/setup/README.md +++ b/data-pipeline/setup/README.md @@ -9,3 +9,12 @@ Deployment scripts for Arc-connected edge agents that run the ROS 2 recording se | Arc agent provisioning | Connect edge devices to Azure Arc-enabled Kubernetes | | Connectivity validation | Verify cloud connectivity and service endpoints | | Runtime configuration | Deploy recording configuration and service dependencies | +| ACSA deployment | Install Azure Container Storage for Arc and configure Blob sync | + +## πŸ“œ Scripts + +| Script | Purpose | +|--------------------|-----------------------------------------------------------------------------------| +| `deploy-acsa.sh` | Install cert-manager + ACSA extensions, assign Blob role, apply PVC/subvolume manifests | + +See the [ACSA setup guide](../../docs/data-pipeline/acsa-setup.md) for deployment instructions. diff --git a/docs/data-pipeline/README.md b/docs/data-pipeline/README.md index 46512bf2..8c5fbe31 100644 --- a/docs/data-pipeline/README.md +++ b/docs/data-pipeline/README.md @@ -20,6 +20,7 @@ Robot-to-cloud data capture pipeline for recording, compressing, and uploading r | Guide | Description | |--------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------| +| [ACSA Setup for ROS 2 Bag Sync](acsa-setup.md) | Deploy Azure Container Storage for Arc to sync ROS 2 bag files from edge clusters to Blob Storage | | [Chunking and Compression Configuration](chunking-compression-config.md) | Configure bag chunking thresholds and zstd compression for ROS 2 edge recording on Jetson devices | ## πŸ—οΈ Architecture diff --git a/docs/data-pipeline/acsa-setup.md b/docs/data-pipeline/acsa-setup.md new file mode 100644 index 00000000..4f547d24 --- /dev/null +++ b/docs/data-pipeline/acsa-setup.md @@ -0,0 +1,360 @@ +--- +sidebar_position: 2 +title: ACSA Setup for ROS 2 Bag Sync +description: Deploy Azure Container Storage enabled by Azure Arc (ACSA) on edge clusters to sync ROS 2 bag files to Azure Blob Storage +author: Microsoft Robotics-AI Team +ms.date: 2026-04-13 +ms.topic: how-to +keywords: + - acsa + - azure container storage + - arc storage + - ros2 bag + - blob sync + - edge storage + - ingest subvolume +--- + +Deploy Azure Container Storage enabled by Azure Arc (ACSA) on Arc-connected edge clusters to automatically sync ROS 2 bag files to Azure Blob Storage. ACSA provides cloud-backed persistent volumes that handle ingest, caching, and eviction transparently β€” recording pods write to a local PVC and files sync to Blob Storage without application-level upload logic. + +## πŸ—οΈ Architecture + +```text +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Edge Cluster (Arc-connected) β”‚ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ ROS 2 β”‚ β”‚ ACSA Extension β”‚ β”‚ +β”‚ β”‚ Recording Pod│───▢│ (Edge Volume) β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ writes to β”‚ β”‚ IngestSubvolume β”‚ β”‚ +β”‚ β”‚ /recording β”‚ β”‚ controller syncs β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ oldest-first β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β–Ό β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ PVC β”‚ β”‚ β”‚ +β”‚ β”‚ recording- β”‚ β”‚ β”‚ +β”‚ β”‚ data (50Gi) β”‚ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ HTTPS + β–Ό + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ Azure Blob Storage β”‚ + β”‚ datasets/recordings/ β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +Recording pods mount the `recording-data` PVC and write bag files to it. The ACSA `IngestSubvolume` controller detects new files and syncs them to a Blob Storage container using managed identity authentication. Local cache eviction removes files after a configurable delay, freeing disk space for continued recording. + +## πŸ“‹ Prerequisites + +| Requirement | Details | +|--------------------------------|--------------------------------------------------------------------------------------| +| Azure Arc-connected K8s cluster | Edge cluster registered with Azure Arc (`az connectedk8s show`) | +| Azure CLI 2.60+ | With `k8s-extension` and `connectedk8s` extensions | +| Terraform outputs | Infrastructure deployed via `infrastructure/terraform/` with a storage account | +| kubectl + envsubst | For manifest rendering and application | +| Azure RBAC | Contributor on the Arc cluster resource group; Storage Blob Data Owner on the storage account | +| Network connectivity | Direct kubectl access or Arc proxy for private clusters | + +> [!NOTE] +> The deploy script automatically installs missing Azure CLI extensions (`k8s-extension`, `connectedk8s`). + +## πŸš€ Quick Start + +```bash +cd data-pipeline/setup + +# Preview configuration without making changes +./deploy-acsa.sh --config-preview \ + --cluster-name \ + --cluster-resource-group \ + --storage-account + +# Deploy ACSA with Terraform auto-discovery +./deploy-acsa.sh \ + --cluster-name \ + --cluster-resource-group \ + --storage-account +``` + +The script reads storage account details from `infrastructure/terraform/terraform.tfstate`. Override any value via CLI arguments or environment variables. + +## βš™οΈ Configuration + +### Script Arguments + +| Argument | Environment Variable | Default | Description | +|-------------------------------|-----------------------------------|------------------------------------------|--------------------------------------------| +| `--cluster-name` | `ARC_CLUSTER_NAME` | (required) | Arc-connected cluster name | +| `--cluster-resource-group` | `ARC_RESOURCE_GROUP` | (required) | Resource group of the Arc cluster | +| `-t, --tf-dir` | `DEFAULT_TF_DIR` | `../../infrastructure/terraform` | Terraform directory for output discovery | +| `--storage-account` | `STORAGE_ACCOUNT_NAME` | Auto-discovered from Terraform | Storage account name override | +| `--storage-resource-group` | `STORAGE_ACCOUNT_RESOURCE_GROUP` | Same as cluster resource group | Storage account resource group | +| `--connectivity-mode` | `ACSA_CONNECTIVITY_MODE` | `direct` | `direct` or `proxy` | +| `--proxy-port` | `ACSA_PROXY_PORT` | `47011` | Arc proxy port (proxy mode only) | +| `--config-preview` | β€” | β€” | Print configuration and exit | + +### Defaults Configuration + +Central defaults live in `data-pipeline/setup/defaults.conf`. Override any value via environment variables before running the script. + +| Variable | Default | Description | +|--------------------------------|---------------------------|----------------------------------------------| +| `ACSA_EXTENSION_VERSION` | `2.11.2` | ACSA Arc extension version | +| `ACSA_RELEASE_TRAIN` | `stable` | Extension release train | +| `ACSA_DISK_STORAGE_CLASS` | `default,local-path` | Backing disk storage classes | +| `ACSA_PVC_NAME` | `recording-data` | PVC name for recording volume | +| `ACSA_PVC_SIZE` | `50Gi` | PVC storage request | +| `ACSA_STORAGE_CLASS` | `cloud-backed-sc` | ACSA storage class name | +| `BLOB_CONTAINER_NAME` | `datasets` | Target Blob Storage container | +| `SUBVOLUME_NAME` | `recordings` | IngestSubvolume resource name | +| `SUBVOLUME_PATH` | `recordings` | Path prefix within the Blob container | +| `ACSA_INGEST_ORDER` | `oldest-first` | File ingest order (`oldest-first`) | +| `ACSA_INGEST_MIN_DELAY_SEC` | `30` | Minimum delay before ingesting a file | +| `ACSA_EVICTION_ORDER` | `unordered` | Cache eviction order | +| `ACSA_EVICTION_MIN_DELAY_SEC` | `600` | Minimum time (seconds) before evicting cached files | +| `ACSA_ON_DELETE` | `trigger-immediate-ingest`| Behavior when the subvolume is deleted | +| `EDGE_NAMESPACE` | `data-pipeline` | Kubernetes namespace for ACSA resources | + +### Sync Behavior + +ACSA `IngestSubvolume` controls how files move from edge to cloud: + +| Parameter | Default | Behavior | +|------------------|-----------------------------|---------------------------------------------------------| +| Ingest order | `oldest-first` | Oldest files sync first, preserving recording chronology | +| Ingest delay | 30 seconds | Wait before syncing β€” avoids uploading files still being written | +| Eviction delay | 600 seconds (10 minutes) | Keep cached files locally after upload for re-reads | +| On delete | `trigger-immediate-ingest` | Upload all remaining data immediately when the subvolume is deleted | + +## πŸ“¦ Deployment Steps + +The `deploy-acsa.sh` script executes these steps in order: + +1. Read Terraform outputs to discover the storage account and resource group +2. Validate cluster connectivity (direct kubectl or Arc proxy) +3. Create the `data-pipeline` namespace +4. Install the `arc-cert-manager` extension (ACSA dependency) +5. Wait for cert-manager to reach `Succeeded` state +6. Install the `azure-arc-containerstorage` extension +7. Wait for ACSA extension to reach `Succeeded` state +8. Retrieve the ACSA managed identity principal ID +9. Assign `Storage Blob Data Owner` role to the ACSA identity on the storage account +10. Create the `datasets` Blob container +11. Render and apply the PVC and IngestSubvolume manifests +12. Wait for the PVC to bind and EdgeVolume to deploy + +## πŸ”Œ Connectivity Modes + +### Direct Mode (Default) + +Use when `kubectl` can reach the cluster API server directly β€” either via VPN, public endpoint, or local network. + +```bash +./deploy-acsa.sh \ + --cluster-name my-edge-cluster \ + --cluster-resource-group rg-edge \ + --storage-account mystorageaccount +``` + +### Proxy Mode + +Use when the cluster API server is unreachable from the dev machine. The script starts an Arc proxy tunnel automatically and cleans it up on exit. + +```bash +./deploy-acsa.sh \ + --connectivity-mode proxy \ + --cluster-name my-edge-cluster \ + --cluster-resource-group rg-edge \ + --storage-account mystorageaccount +``` + +> [!NOTE] +> Arc proxy requires the `connectedk8s` CLI extension and an authenticated Azure session. The proxy creates a temporary kubeconfig and listens on port 47011 by default. + +## πŸ“„ Manifest Templates + +Two Kubernetes manifest templates in `data-pipeline/arc/` are rendered using `envsubst` during deployment. + +### PVC Template (`acsa-pvc.yaml`) + +Creates a `ReadWriteMany` PersistentVolumeClaim backed by the ACSA `cloud-backed-sc` storage class. + +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ${ACSA_PVC_NAME} + namespace: ${EDGE_NAMESPACE} +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: ${ACSA_PVC_SIZE} + storageClassName: ${ACSA_STORAGE_CLASS} +``` + +### IngestSubvolume Template (`acsa-ingest-subvolume.yaml`) + +Defines the sync policy between the edge volume and Blob Storage. + +```yaml +apiVersion: arccontainerstorage.azure.net/v1 +kind: IngestSubvolume +metadata: + name: ${SUBVOLUME_NAME} + namespace: ${EDGE_NAMESPACE} +spec: + edgevolume: ${ACSA_PVC_NAME} + path: ${SUBVOLUME_PATH} + authentication: + authType: MANAGED_IDENTITY + storageAccountEndpoint: "https://${STORAGE_ACCOUNT_NAME}.blob.core.windows.net/" + containerName: ${BLOB_CONTAINER_NAME} + ingest: + order: ${ACSA_INGEST_ORDER} + minDelaySec: ${ACSA_INGEST_MIN_DELAY_SEC} + eviction: + order: ${ACSA_EVICTION_ORDER} + minDelaySec: ${ACSA_EVICTION_MIN_DELAY_SEC} + onDelete: ${ACSA_ON_DELETE} +``` + +## πŸ” Verification + +After deployment, verify the resources are healthy: + +```bash +# Check PVC is bound +kubectl -n data-pipeline get pvc recording-data +# Expected: STATUS = Bound + +# Check EdgeVolume is deployed +kubectl -n data-pipeline get edgevolumes recording-data +# Expected: STATE = deployed + +# Check IngestSubvolume exists +kubectl -n data-pipeline get ingestsubvolumes recordings + +# Check ACSA extension status +az k8s-extension show \ + --name azure-arc-containerstorage \ + --cluster-name \ + --resource-group \ + --cluster-type connectedClusters \ + --query provisioningState -o tsv +# Expected: Succeeded + +# Verify blob container exists +az storage container show \ + --account-name \ + --name datasets \ + --auth-mode login \ + --query name -o tsv +# Expected: datasets +``` + +### Test Sync + +Write a test file to the PVC and confirm it appears in Blob Storage: + +```bash +# Create a test pod that writes to the PVC +kubectl -n data-pipeline run acsa-test \ + --image=busybox \ + --restart=Never \ + --overrides='{ + "spec": { + "containers": [{ + "name": "acsa-test", + "image": "busybox", + "command": ["sh", "-c", "echo test > /data/test.txt && sleep 60"], + "volumeMounts": [{"name": "recording", "mountPath": "/data"}] + }], + "volumes": [{ + "name": "recording", + "persistentVolumeClaim": {"claimName": "recording-data"} + }] + } + }' + +# Wait for ingest delay (30s default), then check Blob Storage +az storage blob list \ + --account-name \ + --container-name datasets \ + --prefix recordings/ \ + --auth-mode login \ + --query "[].name" -o tsv + +# Clean up test pod +kubectl -n data-pipeline delete pod acsa-test +``` + +## πŸ”§ Troubleshooting + +### PVC Stuck in Pending + +The ACSA extension may not have finished provisioning the storage class. + +```bash +# Check storage classes +kubectl get storageclass cloud-backed-sc + +# Check ACSA extension pods +kubectl -n azure-arc-containerstorage get pods + +# Check extension events +kubectl -n azure-arc-containerstorage get events --sort-by='.lastTimestamp' +``` + +### Extension Provisioning Failed + +```bash +# View extension details +az k8s-extension show \ + --name azure-arc-containerstorage \ + --cluster-name \ + --resource-group \ + --cluster-type connectedClusters \ + --query '{state: provisioningState, error: errorInfo}' +``` + +### Files Not Syncing + +1. Confirm the IngestSubvolume exists and has the correct storage account endpoint +2. Verify the ACSA managed identity has `Storage Blob Data Owner` on the storage account +3. Check that files are older than `ACSA_INGEST_MIN_DELAY_SEC` (30s default) + +```bash +# Check ACSA identity role assignment +az role assignment list \ + --scope "/subscriptions//resourceGroups//providers/Microsoft.Storage/storageAccounts/" \ + --query "[?roleDefinitionName=='Storage Blob Data Owner'].{principal:principalId, role:roleDefinitionName}" \ + -o table +``` + +### Arc Proxy Connection Failures + +```bash +# Verify Arc agent is connected +az connectedk8s show \ + --name \ + --resource-group \ + --query connectivityStatus -o tsv +# Expected: Connected + +# Check port availability +lsof -i :47011 +``` + +## πŸ“š Related Documentation + +| Resource | Description | +|----------|-------------| +| [Chunking and Compression Configuration](chunking-compression-config.md) | ROS 2 bag chunking and compression settings for edge recording | +| [Azure Container Storage enabled by Azure Arc](https://learn.microsoft.com/azure/azure-arc/container-storage/) | Microsoft documentation for ACSA | +| [IngestSubvolume specification](https://learn.microsoft.com/azure/azure-arc/container-storage/cloud-ingest-edge-volume-configuration) | CRD reference for `IngestSubvolume` |