diff --git a/.github/workflows/clean-ecs-volume.yml b/.github/workflows/clean-ecs-volume.yml new file mode 100644 index 00000000000..e673abb1fb0 --- /dev/null +++ b/.github/workflows/clean-ecs-volume.yml @@ -0,0 +1,242 @@ +name: Clean ECS Volume + +on: + workflow_dispatch: + +env: + AWS_REGION: eu-central-1 + ENVIRONMENT: dev + SERVICE_NAME: zebra + +jobs: + clean-volume: + name: Clean ECS Volume + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y jq + + - name: Set variables + id: vars + run: | + CLUSTER_NAME="${{ env.ENVIRONMENT }}-${{ env.SERVICE_NAME }}-cluster" + ECS_SERVICE_NAME="${{ env.ENVIRONMENT }}-${{ env.SERVICE_NAME }}" + CONTAINER_NAME="${{ env.SERVICE_NAME }}-container" + + echo "cluster_name=${CLUSTER_NAME}" >> $GITHUB_OUTPUT + echo "ecs_service_name=${ECS_SERVICE_NAME}" >> $GITHUB_OUTPUT + echo "container_name=${CONTAINER_NAME}" >> $GITHUB_OUTPUT + + echo "Cluster: ${CLUSTER_NAME}" + echo "Service: ${ECS_SERVICE_NAME}" + echo "Container: ${CONTAINER_NAME}" + + - name: Get task definition and network config + id: get-config + run: | + # Get the task definition ARN from the service + TASK_DEF_ARN=$(aws ecs describe-services \ + --cluster ${{ steps.vars.outputs.cluster_name }} \ + --services ${{ steps.vars.outputs.ecs_service_name }} \ + --query 'services[0].taskDefinition' \ + --output text) + + echo "task_def_arn=${TASK_DEF_ARN}" >> $GITHUB_OUTPUT + echo "Task Definition: ${TASK_DEF_ARN}" + + # Get mount path from task definition + MOUNT_PATH=$(aws ecs describe-task-definition \ + --task-definition "${TASK_DEF_ARN}" \ + --query 'taskDefinition.containerDefinitions[0].mountPoints[?sourceVolume==`persistent-volume`].containerPath' \ + --output text) + + echo "mount_path=${MOUNT_PATH:-/persistent}" >> $GITHUB_OUTPUT + echo "Mount Path: ${MOUNT_PATH:-/persistent}" + + # Get subnet and security group from the service + SUBNETS=$(aws ecs describe-services \ + --cluster ${{ steps.vars.outputs.cluster_name }} \ + --services ${{ steps.vars.outputs.ecs_service_name }} \ + --query 'services[0].networkConfiguration.awsvpcConfiguration.subnets' \ + --output text) + + SECURITY_GROUPS=$(aws ecs describe-services \ + --cluster ${{ steps.vars.outputs.cluster_name }} \ + --services ${{ steps.vars.outputs.ecs_service_name }} \ + --query 'services[0].networkConfiguration.awsvpcConfiguration.securityGroups' \ + --output text) + + # Use first subnet and security group + SUBNET=$(echo $SUBNETS | awk '{print $1}') + SECURITY_GROUP=$(echo $SECURITY_GROUPS | awk '{print $1}') + + echo "subnet=${SUBNET}" >> $GITHUB_OUTPUT + echo "security_group=${SECURITY_GROUP}" >> $GITHUB_OUTPUT + + echo "Subnet: ${SUBNET}" + echo "Security Group: ${SECURITY_GROUP}" + + - name: Get EFS and role details + id: get-efs + run: | + # Get EFS file system ID and roles from task definition + TASK_DEF_JSON=$(aws ecs describe-task-definition --task-definition ${{ steps.get-config.outputs.task_def_arn }}) + + EFS_FS_ID=$(echo "$TASK_DEF_JSON" | jq -r '.taskDefinition.volumes[]? | select(.name=="persistent-volume") | .efsVolumeConfiguration.fileSystemId') + EXEC_ROLE_ARN=$(echo "$TASK_DEF_JSON" | jq -r '.taskDefinition.executionRoleArn') + TASK_ROLE_ARN=$(echo "$TASK_DEF_JSON" | jq -r '.taskDefinition.taskRoleArn // empty') + + echo "efs_fs_id=${EFS_FS_ID}" >> $GITHUB_OUTPUT + echo "execution_role_arn=${EXEC_ROLE_ARN}" >> $GITHUB_OUTPUT + echo "task_role_arn=${TASK_ROLE_ARN:-${EXEC_ROLE_ARN}}" >> $GITHUB_OUTPUT + + echo "EFS File System ID: ${EFS_FS_ID}" + + - name: Run cleanup task + id: cleanup-task + run: | + echo "Running cleanup task to remove all files from ECS volume at ${{ steps.get-config.outputs.mount_path }}..." + + # Prepare network configuration + NETWORK_CONFIG=$(jq -n \ + --arg subnet "${{ steps.get-config.outputs.subnet }}" \ + --arg sg "${{ steps.get-config.outputs.security_group }}" \ + '{ + awsvpcConfiguration: { + subnets: [$subnet], + securityGroups: [$sg], + assignPublicIp: "DISABLED" + } + }') + + # Build cleanup command + CLEANUP_CMD="echo 'Starting cleanup...' && rm -rf ${{ steps.get-config.outputs.mount_path }}/* ${{ steps.get-config.outputs.mount_path }}/.[!.]* ${{ steps.get-config.outputs.mount_path }}/..?* 2>/dev/null || true && find ${{ steps.get-config.outputs.mount_path }} -mindepth 1 -delete 2>/dev/null || true && echo 'Cleanup completed' && ls -la ${{ steps.get-config.outputs.mount_path }} && echo 'Volume is now empty' && sleep 10" + + # Build cleanup task definition using Alpine (no entrypoint issues) + CLEANUP_TASK_DEF=$(jq -n \ + --arg exec_role "${{ steps.get-efs.outputs.execution_role_arn }}" \ + --arg task_role "${{ steps.get-efs.outputs.task_role_arn }}" \ + --arg efs_id "${{ steps.get-efs.outputs.efs_fs_id }}" \ + --arg mount_path "${{ steps.get-config.outputs.mount_path }}" \ + --arg cmd "${CLEANUP_CMD}" \ + '{ + family: "cleanup-ecs-volume", + networkMode: "awsvpc", + requiresCompatibilities: ["FARGATE"], + cpu: "256", + memory: "512", + executionRoleArn: $exec_role, + taskRoleArn: $task_role, + containerDefinitions: [{ + name: "cleanup-container", + image: "alpine:latest", + essential: true, + command: ["/bin/sh", "-c", $cmd], + mountPoints: [{ + sourceVolume: "persistent-volume", + containerPath: $mount_path, + readOnly: false + }] + }], + volumes: [{ + name: "persistent-volume", + efsVolumeConfiguration: { + fileSystemId: $efs_id, + rootDirectory: "/", + transitEncryption: "ENABLED" + } + }] + }') + + # Register cleanup task definition + CLEANUP_TASK_DEF_ARN=$(echo "$CLEANUP_TASK_DEF" | \ + aws ecs register-task-definition \ + --cli-input-json file:///dev/stdin \ + --query 'taskDefinition.taskDefinitionArn' \ + --output text) + + echo "Registered cleanup task definition: ${CLEANUP_TASK_DEF_ARN}" + + # Run cleanup task + TASK_ARN=$(aws ecs run-task \ + --cluster ${{ steps.vars.outputs.cluster_name }} \ + --task-definition "${CLEANUP_TASK_DEF_ARN}" \ + --launch-type FARGATE \ + --network-configuration "${NETWORK_CONFIG}" \ + --query 'tasks[0].taskArn' \ + --output text) + + if [ -z "$TASK_ARN" ] || [ "$TASK_ARN" == "None" ]; then + echo "Error: Failed to start cleanup task" + exit 1 + fi + + echo "task_arn=${TASK_ARN}" >> $GITHUB_OUTPUT + echo "Started cleanup task: ${TASK_ARN}" + + # Wait for the task to complete + echo "Waiting for cleanup task to complete..." + aws ecs wait tasks-stopped \ + --cluster ${{ steps.vars.outputs.cluster_name }} \ + --tasks "${TASK_ARN}" + + # Get task exit code + EXIT_CODE=$(aws ecs describe-tasks \ + --cluster ${{ steps.vars.outputs.cluster_name }} \ + --tasks "${TASK_ARN}" \ + --query 'tasks[0].containers[0].exitCode' \ + --output text) + + STOP_REASON=$(aws ecs describe-tasks \ + --cluster ${{ steps.vars.outputs.cluster_name }} \ + --tasks "${TASK_ARN}" \ + --query 'tasks[0].stoppedReason' \ + --output text) + + echo "exit_code=${EXIT_CODE}" >> $GITHUB_OUTPUT + echo "stop_reason=${STOP_REASON}" >> $GITHUB_OUTPUT + + echo "Task stopped with exit code: ${EXIT_CODE}" + echo "Stop reason: ${STOP_REASON}" + + if [ "$EXIT_CODE" != "0" ] && [ "$EXIT_CODE" != "None" ]; then + echo "::warning::Cleanup task exited with code ${EXIT_CODE}" + exit 1 + fi + + echo "✅ Cleanup completed successfully" + + - name: Cleanup summary + if: always() + run: | + echo "## Cleanup Summary" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Environment Details:**" >> $GITHUB_STEP_SUMMARY + echo "- Cluster: \`${{ steps.vars.outputs.cluster_name }}\`" >> $GITHUB_STEP_SUMMARY + echo "- Service: \`${{ steps.vars.outputs.ecs_service_name }}\`" >> $GITHUB_STEP_SUMMARY + echo "- Mount Path: \`${{ steps.get-config.outputs.mount_path }}\`" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Task Details:**" >> $GITHUB_STEP_SUMMARY + echo "- Task ARN: \`${{ steps.cleanup-task.outputs.task_arn }}\`" >> $GITHUB_STEP_SUMMARY + echo "- Exit Code: \`${{ steps.cleanup-task.outputs.exit_code }}\`" >> $GITHUB_STEP_SUMMARY + echo "- Stop Reason: \`${{ steps.cleanup-task.outputs.stop_reason }}\`" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + + if [ "${{ steps.cleanup-task.outputs.exit_code }}" == "0" ]; then + echo "✅ **Status:** ECS volume cleaned successfully" >> $GITHUB_STEP_SUMMARY + else + echo "⚠️ **Status:** Cleanup completed with errors" >> $GITHUB_STEP_SUMMARY + fi