Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions .cfignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,7 @@
/public/packs-test
/node_modules

# Ignore Rust build artifacts, but keep the prebuilt widget library
# Ignore Rust build artifacts
target/
ext/widget_renderer/target/
!ext/widget_renderer/target/
!ext/widget_renderer/target/release/
!ext/widget_renderer/target/release/libwidget_renderer.so
!ext/widget_renderer/libwidget_renderer.so
49 changes: 39 additions & 10 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,42 @@ jobs:
rustc --version
cargo --version

- restore_cache:
keys:
- v2-cargo-{{ checksum "ext/widget_renderer/Cargo.lock" }}
- v2-cargo-

- run:
name: Build widget renderer (Rust)
command: |
source $HOME/.cargo/env
cargo build --release --manifest-path ext/widget_renderer/Cargo.toml

- run:
name: Verify Rust native library linkage
command: |
set -euo pipefail
LIB=ext/widget_renderer/target/release/libwidget_renderer.so
if [ -f "$LIB" ]; then
echo "Found built rust library; verifying linkage..."
if ldd "$LIB" 2>&1 | grep -q "not found"; then
echo "ERROR: Rust library has unresolved dependencies (ldd shows 'not found')."
ldd "$LIB" || true
exit 1
else
echo "Rust library linkage looks good"
fi
else
echo "No Rust library built - skipping linkage verification"
fi

- save_cache:
paths:
- ext/widget_renderer/target
- ~/.cargo/registry
- ~/.cargo/git
key: v2-cargo-{{ checksum "ext/widget_renderer/Cargo.lock" }}

# Download and cache dependencies
- restore_cache:
keys:
Expand Down Expand Up @@ -117,12 +147,11 @@ jobs:
echo "Skipping Sidekiq deploy on parallel node ${CIRCLE_NODE_INDEX}"
exit 0
fi
# Remove prebuilt Rust library - it must be built on CF with the correct Ruby paths
# The library built on CircleCI links against /usr/local/lib/libruby.so.3.2
# but on CF, Ruby is in /home/vcap/deps/*/ruby/lib/
echo "Removing prebuilt Rust library (will be rebuilt on CF)..."
rm -rf ext/widget_renderer/target/release/libwidget_renderer.so 2>/dev/null || true
rm -f ext/widget_renderer/libwidget_renderer.so 2>/dev/null || true
# Keep prebuilt Rust library - extconf.rb builds it during bundle install with correct paths
# The library is built with rutie which properly links against the CF Ruby installation
# echo "Removing prebuilt Rust library (will be rebuilt on CF)..."
# rm -rf ext/widget_renderer/target/release/libwidget_renderer.so 2>/dev/null || true
# rm -f ext/widget_renderer/libwidget_renderer.so 2>/dev/null || true
./.circleci/deploy-sidekiq.sh
no_output_timeout: 30m

Expand All @@ -136,10 +165,10 @@ jobs:
fi
# Wait for Sidekiq deployment to complete before starting web deploy
sleep 120
# Remove prebuilt Rust library - it must be built on CF with the correct Ruby paths
echo "Removing prebuilt Rust library (will be rebuilt on CF)..."
rm -rf ext/widget_renderer/target/release/libwidget_renderer.so 2>/dev/null || true
rm -f ext/widget_renderer/libwidget_renderer.so 2>/dev/null || true
# Keep prebuilt Rust library - extconf.rb builds it during bundle install with correct paths
# echo "Removing prebuilt Rust library (will be rebuilt on CF)..."
# rm -rf ext/widget_renderer/target/release/libwidget_renderer.so 2>/dev/null || true
# rm -f ext/widget_renderer/libwidget_renderer.so 2>/dev/null || true
./.circleci/deploy.sh
no_output_timeout: 30m

Expand Down
43 changes: 42 additions & 1 deletion .circleci/deploy-sidekiq.sh
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,43 @@ cf_push_with_retry() {
# Wait for any in-progress deployment
wait_for_deployment "$app_name"

# Update app to use 180s invocation timeout and process health check before rolling deploy
echo "Updating health check configuration for $app_name..."
cf set-health-check "$app_name" process --invocation-timeout 180 || true
sleep 2

# Get current instance count and scale down to 1 to avoid memory quota issues during rolling deploy
echo "Checking current instance count for $app_name..."
local current_instances=$(cf app "$app_name" | grep "^instances:" | awk '{print $2}' | cut -d'/' -f2 || echo "1")
echo "Current instances: $current_instances"

if [ "$current_instances" -gt 1 ]; then
echo "Scaling down to 1 instance to free memory for rolling deploy..."
cf scale "$app_name" -i 1 || true
sleep 5
fi

for i in $(seq 1 $max_retries); do
echo "Attempt $i of $max_retries to push $app_name..."
if cf push "$app_name" --strategy rolling; then

# Stop the app first to free memory for staging
echo "Stopping $app_name to free memory for staging..."
cf stop "$app_name" || true
sleep 5

# Push without rolling strategy (direct replacement since we stopped it)
# Let CF auto-detect buildpacks to avoid re-running supply phase (Rust already built in CircleCI)
if cf push "$app_name" \
-t 180 \
Comment on lines +124 to +132
Copy link

Copilot AI Dec 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The deploy script stops the Sidekiq app (line 126) before pushing, which causes downtime for background job processing. This means jobs will not be processed during the deployment window. For a worker process like Sidekiq, a better approach would be to allow the old instance to finish current jobs gracefully before replacing it, or ensure the deployment window is communicated to users.

Suggested change
# Stop the app first to free memory for staging
echo "Stopping $app_name to free memory for staging..."
cf stop "$app_name" || true
sleep 5
# Push without rolling strategy (direct replacement since we stopped it)
# Let CF auto-detect buildpacks to avoid re-running supply phase (Rust already built in CircleCI)
if cf push "$app_name" \
-t 180 \
# Do not stop the app to avoid downtime for Sidekiq background jobs.
# Keep a single instance running and rely on a rolling deploy to replace it gracefully.
# Push with rolling strategy (zero-downtime replacement of the running worker instance)
# Let CF auto-detect buildpacks to avoid re-running supply phase (Rust already built in CircleCI)
if cf push "$app_name" \
-t 180 \
--strategy rolling \

Copilot uses AI. Check for mistakes.
--health-check-type process; then
echo "Successfully pushed $app_name"

# Scale back up to original instance count
if [ "$current_instances" -gt 1 ]; then
echo "Scaling up to $current_instances instances..."
cf scale "$app_name" -i "$current_instances" || true
fi

release_deploy_lock "$app_name"
trap - EXIT # Clear the trap
return 0
Expand All @@ -120,6 +153,12 @@ cf_push_with_retry() {
fi
done

# If we failed, try to scale back up anyway
if [ "$current_instances" -gt 1 ]; then
echo "Deploy failed, attempting to scale back up to $current_instances instances..."
cf scale "$app_name" -i "$current_instances" || true
fi

release_deploy_lock "$app_name"
trap - EXIT # Clear the trap
echo "Failed to push $app_name after $max_retries attempts"
Expand All @@ -132,6 +171,8 @@ then
# Log into CF and push
cf login -a $CF_API_ENDPOINT -u $CF_PRODUCTION_SPACE_DEPLOYER_USERNAME -p $CF_PRODUCTION_SPACE_DEPLOYER_PASSWORD -o $CF_ORG -s prod
echo "PUSHING to PRODUCTION..."
echo "Syncing Login.gov environment variables..."
./.circleci/sync-login-gov-env.sh touchpoints-production-sidekiq-worker
cf_push_with_retry touchpoints-production-sidekiq-worker
echo "Push to Production Complete."
else
Expand Down
114 changes: 101 additions & 13 deletions .circleci/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ release_deploy_lock() {
# Wait for any in-progress deployments to complete before starting
wait_for_deployment() {
local app_name="$1"
local max_wait=600 # 10 minutes max
local max_wait=800 # 13 minutes and 20 seconds max
local wait_interval=15
local waited=0

Expand All @@ -87,12 +87,87 @@ wait_for_deployment() {
return 0
}

# Run migrations as a CF task and wait for completion
run_migrations() {
local app_name="$1"
local max_wait=1800 # 30 minutes max for migrations
local wait_interval=10
local waited=0

echo "Running database migrations for $app_name..."

# Start migration task
local task_output=$(cf run-task "$app_name" --command "bundle exec rails db:migrate" --name "pre-deploy-migrations" 2>&1)
echo "$task_output"

# Extract task ID from output
local task_id=$(echo "$task_output" | grep -oE 'task id:[[:space:]]+[0-9]+' | grep -oE '[0-9]+' || echo "")

if [ -z "$task_id" ]; then
echo "Warning: Could not determine task ID, checking tasks list..."
sleep 5
task_id=$(cf tasks "$app_name" | grep "pre-deploy-migrations" | grep "RUNNING" | head -1 | awk '{print $1}')
fi

if [ -z "$task_id" ]; then
echo "Error: Failed to start migration task"
return 1
fi

echo "Migration task started with ID: $task_id"
echo "Waiting for migrations to complete..."

# Wait for task to complete
while [ $waited -lt $max_wait ]; do
local task_state=$(cf tasks "$app_name" | grep "^$task_id " | awk '{print $3}')

if [ "$task_state" == "SUCCEEDED" ]; then
echo "✓ Migrations completed successfully"
return 0
elif [ "$task_state" == "FAILED" ]; then
echo "✗ Migration task failed. Checking logs..."
cf logs "$app_name" --recent | grep "pre-deploy-migrations" | tail -50
return 1
fi

if [ $((waited % 30)) -eq 0 ]; then
echo "Migration task still running (state: $task_state, waited ${waited}s)..."
fi

sleep $wait_interval
waited=$((waited + wait_interval))
done

echo "Error: Migration task did not complete within ${max_wait}s"
cf logs "$app_name" --recent | grep "pre-deploy-migrations" | tail -50
return 1
}

# Retry function to handle staging and deployment conflicts
cf_push_with_retry() {
local app_name="$1"
local manifest_path="${2:-}"
local run_migrations="${3:-false}"
local max_retries=5
local retry_delay=90

# Run migrations first if requested
if [ "$run_migrations" == "true" ]; then
if ! run_migrations "$app_name"; then
echo "Error: Migrations failed, aborting deployment"
return 1
fi
fi

# Ensure CircleCI-built Rust library is present
if [ -f "ext/widget_renderer/target/release/libwidget_renderer.so" ]; then
echo "CircleCI-built Rust library found, will be included in deployment"
file ext/widget_renderer/target/release/libwidget_renderer.so
readelf -n ext/widget_renderer/target/release/libwidget_renderer.so | grep "Build ID" || true
else
echo "WARNING: No CircleCI-built Rust library found at ext/widget_renderer/target/release/libwidget_renderer.so"
fi

# Acquire lock first
acquire_deploy_lock "$app_name"

Expand All @@ -104,19 +179,30 @@ cf_push_with_retry() {

for i in $(seq 1 $max_retries); do
echo "Attempt $i of $max_retries to push $app_name..."
if cf push "$app_name" --strategy rolling; then
local exit_code=0

set +e
if [ -n "$manifest_path" ]; then
echo "Using manifest: $manifest_path"
cf push "$app_name" -f "$manifest_path" --strategy rolling -t 180
else
cf push "$app_name" --strategy rolling -t 180
fi
exit_code=$?
set -e

if [ $exit_code -eq 0 ]; then
echo "Successfully pushed $app_name"
release_deploy_lock "$app_name"
trap - EXIT # Clear the trap
return 0
else
local exit_code=$?
if [ $i -lt $max_retries ]; then
echo "Push failed (exit code: $exit_code), waiting ${retry_delay}s before retry..."
sleep $retry_delay
# Re-check for in-progress deployments before retrying
wait_for_deployment "$app_name"
fi
fi

if [ $i -lt $max_retries ]; then
echo "Push failed (exit code: $exit_code), waiting ${retry_delay}s before retry..."
sleep $retry_delay
# Re-check for in-progress deployments before retrying
wait_for_deployment "$app_name"
fi
done

Expand All @@ -132,7 +218,9 @@ then
# Log into CF and push
cf login -a $CF_API_ENDPOINT -u $CF_PRODUCTION_SPACE_DEPLOYER_USERNAME -p $CF_PRODUCTION_SPACE_DEPLOYER_PASSWORD -o $CF_ORG -s prod
echo "PUSHING web servers to Production..."
cf_push_with_retry touchpoints
echo "Syncing Login.gov environment variables..."
./.circleci/sync-login-gov-env.sh touchpoints
cf_push_with_retry touchpoints touchpoints.yml false
Copy link

Copilot AI Dec 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The production deployment (line 223) passes 'false' for the 'run_migrations' parameter, while demo and staging deployments (lines 235, 247) pass 'true'. This means migrations are not automatically run for production deployments, which could lead to schema mismatches after deployment. Production should either run migrations or have a clear documented process for running them separately.

Suggested change
cf_push_with_retry touchpoints touchpoints.yml false
cf_push_with_retry touchpoints touchpoints.yml true

Copilot uses AI. Check for mistakes.
echo "Push to Production Complete."
else
echo "Not on the production branch."
Expand All @@ -144,7 +232,7 @@ then
# Log into CF and push
cf login -a $CF_API_ENDPOINT -u $CF_USERNAME -p $CF_PASSWORD -o $CF_ORG -s $CF_SPACE
echo "Pushing web servers to Demo..."
cf_push_with_retry touchpoints-demo
cf_push_with_retry touchpoints-demo "" true
echo "Push to Demo Complete."
else
echo "Not on the main branch."
Expand All @@ -156,7 +244,7 @@ then
# Log into CF and push
cf login -a $CF_API_ENDPOINT -u $CF_USERNAME -p $CF_PASSWORD -o $CF_ORG -s $CF_SPACE
echo "Pushing web servers to Staging..."
cf_push_with_retry touchpoints-staging
cf_push_with_retry touchpoints-staging "" true
echo "Push to Staging Complete."
else
echo "Not on the develop branch."
Expand Down
42 changes: 42 additions & 0 deletions .circleci/sync-login-gov-env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env bash

set -euo pipefail

require_env() {
local var_name="$1"
if [ -z "${!var_name:-}" ]; then
echo "Missing required env var: ${var_name}" >&2
exit 1
fi
}

escape_private_key() {
ruby -e 'print STDIN.read.gsub("\r\n", "\n").gsub("\n", "\\n")'
}

sync_login_gov_env() {
local app_name="$1"

require_env LOGIN_GOV_CLIENT_ID
require_env LOGIN_GOV_IDP_BASE_URL
require_env LOGIN_GOV_REDIRECT_URI
require_env LOGIN_GOV_PRIVATE_KEY

local private_key_escaped
private_key_escaped="$(printf "%s" "${LOGIN_GOV_PRIVATE_KEY}" | escape_private_key)"

cf set-env "$app_name" LOGIN_GOV_CLIENT_ID "$LOGIN_GOV_CLIENT_ID" >/dev/null
cf set-env "$app_name" LOGIN_GOV_IDP_BASE_URL "$LOGIN_GOV_IDP_BASE_URL" >/dev/null
cf set-env "$app_name" LOGIN_GOV_REDIRECT_URI "$LOGIN_GOV_REDIRECT_URI" >/dev/null
cf set-env "$app_name" LOGIN_GOV_PRIVATE_KEY "$private_key_escaped" >/dev/null

echo "Synced Login.gov env to ${app_name}"
}

if [ "${1:-}" == "" ]; then
echo "Usage: $0 <app-name>" >&2
exit 2
fi

sync_login_gov_env "$1"

3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,6 @@ ext/widget_renderer/Makefile
ext/widget_renderer/*.dylib
# Keep the prebuilt Linux .so for Cloud Foundry deployment
!ext/widget_renderer/libwidget_renderer.so

# Certificate files (avoid accidental commits of sensitive keys/certs)
*.pem
Loading
Loading