Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Blue green cutover and improvements in logging #7

Merged
merged 7 commits into from
Apr 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM dnxsolutions/aws:1.16.263-dnx2
FROM dnxsolutions/aws:1.17.14-dnx3

RUN apk --no-cache update && \
apk --no-cache add python3 && \
Expand All @@ -8,7 +8,7 @@ RUN pip3 install --no-cache --upgrade boto3

ADD src .

RUN chmod +x deploy.sh task-deploy.sh run-task.sh cutover.sh tail-task-logs.py
RUN chmod +x *.sh *.py

ENTRYPOINT [ "/bin/bash", "-c" ]

Expand Down
18 changes: 18 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
version: '3.4'

services:
app:
build: .
volumes:
- .:/work
environment:
- AWS_ACCESS_KEY_ID
- AWS_ACCOUNT_ID
- AWS_DEFAULT_REGION
- AWS_ROLE
- AWS_SECRET_ACCESS_KEY
- AWS_SECURITY_TOKEN
- AWS_SESSION_EXPIRATION
- AWS_SESSION_TOKEN
entrypoint: ""
command: /bin/bash
21 changes: 5 additions & 16 deletions src/app-spec.tpl.json
Original file line number Diff line number Diff line change
@@ -1,17 +1,6 @@
{
"version": 1,
"Resources": [
{
"TargetService": {
"Type": "AWS::ECS::Service",
"Properties": {
"TaskDefinition": "$TASK_ARN",
"LoadBalancerInfo": {
"ContainerName": "$APP_NAME",
"ContainerPort": $CONTAINER_PORT
}
}
}
}
]
}
"revisionType": "AppSpecContent",
"appSpecContent": {
"content": "{\"version\":1,\"Resources\":[{\"TargetService\":{\"Type\":\"AWS::ECS::Service\",\"Properties\":{\"TaskDefinition\":\"$TASK_ARN\",\"LoadBalancerInfo\":{\"ContainerName\":\"$APP_NAME\",\"ContainerPort\":$CONTAINER_PORT}}}}]}"
}
}
15 changes: 2 additions & 13 deletions src/cutover.sh → src/deploy-cutover.sh
Original file line number Diff line number Diff line change
@@ -1,31 +1,20 @@
#!/bin/bash -e

ERROR=0
if [[ -z "$AWS_DEFAULT_REGION" ]]; then echo "---> ERROR: Missing variable AWS_DEFAULT_REGION"; ERROR=1; fi
if [[ -z "$APP_NAME" ]]; then echo "---> ERROR: Missing variable APP_NAME"; ERROR=1; fi
if [[ -z "$CLUSTER_NAME" ]]; then echo "---> ERROR: Missing variable CLUSTER_NAME"; ERROR=1; fi
if [[ -z "$CONTAINER_PORT" ]]; then echo "---> ERROR: Missing variable CONTAINER_PORT"; ERROR=1; fi
if [[ -z "$IMAGE_NAME" ]]; then echo "---> ERROR: Missing variable IMAGE_NAME"; ERROR=1; fi
if [[ "$ERROR" == "1" ]]; then exit 1; fi

# Fetch deployment ID pending cutover to the green(new) enviroment
DEPLOYMENT_ID=$(aws deploy list-deployments --application-name=$CLUSTER_NAME-$APP_NAME --deployment-group=$CLUSTER_NAME-$APP_NAME --max-items=1 --query="deployments[0]" --output=text | head -n 1)

DEPLOYMENT_PID=$!

#echo "---> For More Deployment info: https://$AWS_DEFAULT_REGION.console.aws.amazon.com/codesuite/codedeploy/deployments/$DEPLOYMENT_ID"

#echo "---> Waiting for Deployment ..."

aws deploy continue-deployment --deployment-id $DEPLOYMENT_ID --deployment-wait-type "READY_WAIT"

wait $DEPLOYMENT_PID
RET=$?

if [ $RET -eq 0 ]; then
echo "---> Deployment completed!"
echo "---> Cutover engaged!"
else
echo "---> ERROR: Deployment FAILED!"
echo "---> ERROR: Cutover FAILED!"
fi

exit $RET
20 changes: 20 additions & 0 deletions src/deploy-stop.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash -e

if [[ -z "$AWS_DEFAULT_REGION" ]]; then echo "---> ERROR: Missing variable AWS_DEFAULT_REGION"; ERROR=1; fi
if [[ -z "$APP_NAME" ]]; then echo "---> ERROR: Missing variable APP_NAME"; ERROR=1; fi
if [[ -z "$CLUSTER_NAME" ]]; then echo "---> ERROR: Missing variable CLUSTER_NAME"; ERROR=1; fi

# Fetch deployment ID pending cutover to the green(new) enviroment
DEPLOYMENT_ID=$(aws deploy list-deployments --application-name=$CLUSTER_NAME-$APP_NAME --deployment-group=$CLUSTER_NAME-$APP_NAME --max-items=1 --query="deployments[0]" --output=text | head -n 1)

aws deploy stop-deployment --deployment-id $DEPLOYMENT_ID

RET=$?

if [ $RET -eq 0 ]; then
echo "---> Deployment stopped!"
else
echo "---> ERROR: Deployment stopped FAILED!"
fi

exit $RET
110 changes: 89 additions & 21 deletions src/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,47 +17,115 @@ envsubst < task-definition.tpl.json > task-definition.json
echo "---> Task Definition"
cat task-definition.json

export TASK_ARN=TASK_ARN_PLACEHOLDER
export TASK_ARN=$(aws ecs register-task-definition --cli-input-json file://./task-definition.json | jq --raw-output '.taskDefinition.taskDefinitionArn')

envsubst < app-spec.tpl.json > app-spec.json
echo
echo "---> App-spec for CodeDeploy"
cat app-spec.json

echo
echo "---> Creating deployment with CodeDeploy"

set +e # disable bash exit on error

# Update the ECS service to use the updated Task version
aws ecs deploy \
--service $APP_NAME \
--task-definition ./task-definition.json \
--cluster $CLUSTER_NAME \
--codedeploy-appspec ./app-spec.json \
--codedeploy-application $CLUSTER_NAME-$APP_NAME \
--codedeploy-deployment-group $CLUSTER_NAME-$APP_NAME &
# # Update the ECS service to use the updated Task version
DEPLOYMENT_ID=$(aws deploy create-deployment \
--application-name $CLUSTER_NAME-$APP_NAME \
--deployment-config-name CodeDeployDefault.ECSAllAtOnce \
--deployment-group-name $CLUSTER_NAME-$APP_NAME \
--description Deployment \
--revision file://app-spec.json \
--query="deploymentId" --output text)

# In case there is already a deployment in progress, script will fail
if [ $? -eq 255 ]; then
echo
echo
echo "===> Deployment already in progress. Please approve current deployment before performing a new deployment"
echo
echo
exit 1
fi

sleep 5 # Wait for deployment to be created

DEPLOYMENT_PID=$!
echo "---> For more info: https://$AWS_DEFAULT_REGION.console.aws.amazon.com/codesuite/codedeploy/deployments/$DEPLOYMENT_ID"

sleep 5 # Wait for deployment to be created so we can fetch DEPLOYMENT_ID next
/work/tail-ecs-events.py &
TAIL_ECS_EVENTS_PID=$!

DEPLOYMENT_ID=$(aws deploy list-deployments --application-name=$CLUSTER_NAME-$APP_NAME --deployment-group=$CLUSTER_NAME-$APP_NAME --max-items=1 --query="deployments[0]" --output=text | head -n 1)
RET=0

echo "---> For More Deployment info: https://$AWS_DEFAULT_REGION.console.aws.amazon.com/codesuite/codedeploy/deployments/$DEPLOYMENT_ID"
while [ "$(aws deploy get-deployment --deployment-id $DEPLOYMENT_ID --query deploymentInfo.status --output text)" == "Created" ]
do
sleep 1
done

echo "---> Waiting for Deployment ..."
echo "---> Deployment created!"

/work/tail-ecs-events.py &
TAIL_PID=$!
while [ "$(aws deploy get-deployment --deployment-id $DEPLOYMENT_ID --query deploymentInfo.status --output text)" == "InProgress" ]
do
sleep 1
done

wait $DEPLOYMENT_PID
RET=$?
TASK_SET_ID=$(aws ecs describe-services --cluster $CLUSTER_NAME --service $APP_NAME --query "services[0].taskSets[?status == 'ACTIVE'].id" --output text)
if [ "${TASK_SET_ID}" != "" ]; then
echo "---> Task Set ID: $TASK_SET_ID"
fi

# Due the known issue on Codedeploy, CodeDeploy will fail the deployment if the ECS service is unhealthy/unstable for 5mins for replacement
# taskset during the wait status, this 5mins is a non-configurable value as today.
# For the reason above we wait for 10 minutes before consider the deployment in ready status as successful

WAIT_PERIOD=0
MAX_WAIT=300 #$(aws ecs describe-services --cluster $CLUSTER_NAME --service $APP_NAME --query services[0].healthCheckGracePeriodSeconds --output text)
MAX_WAIT_BUFFER=60

echo
echo
echo "---> Waiting $((MAX_WAIT + MAX_WAIT_BUFFER)) seconds for tasks to stabilise"
echo

while [ "$(aws deploy get-deployment --deployment-id $DEPLOYMENT_ID --query deploymentInfo.status --output text)" == "Ready" ]
do
if [ "$WAIT_PERIOD" -ge "$((MAX_WAIT + MAX_WAIT_BUFFER))" ]; then
break
fi
sleep 10
WAIT_PERIOD=$((WAIT_PERIOD + 10))
done

DEPLOYMENT_STATUS=$(aws deploy get-deployment --deployment-id $DEPLOYMENT_ID --query deploymentInfo.status --output text)
echo
echo "---> Deployment status: $DEPLOYMENT_STATUS"
echo

if [ "$DEPLOYMENT_STATUS" == "Failed" ]
then
TASK_ARN=$(aws ecs list-tasks --cluster dev --desired-status STOPPED --started-by $TASK_SET_ID --query taskArns[0] --output text)
if [ "${TASK_ARN}" != "None" ]; then
echo "---> Displaying logs of STOPPED task: $TASK_ARN"
echo
/work/tail-task-logs.py $TASK_ARN
fi
RET=1
elif [ "$DEPLOYMENT_STATUS" == "Stopped" ]
then
RET=1
elif [ "$DEPLOYMENT_STATUS" == "Succeeded" ]
then
RET=0
fi

if [ $RET -eq 0 ]; then
echo "---> Deployment completed!"
echo
echo "---> Completed!"
else
echo
echo "---> ERROR: Deployment FAILED!"
fi

kill $TAIL_PID
kill $TAIL_ECS_EVENTS_PID

exit $RET
exit $RET
2 changes: 1 addition & 1 deletion src/tail-ecs-events.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
events_collected.insert(0, event)

for event_collected in events_collected:
print('%s\t%s' % ('{0:%Y-%m-%d %H:%M:%S %z}'.format(event_collected['createdAt']), event_collected['message']))
print('%s %s' % ('{0:%Y-%m-%d %H:%M:%S %z}'.format(event_collected['createdAt']), event_collected['message']))

last_event = events[0]['id']
time.sleep(5)
Expand Down
65 changes: 36 additions & 29 deletions src/tail-task-logs.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,56 @@
#!/usr/bin/env python3

import boto3, json, time, os, datetime
import boto3, json, time, os, datetime, sys

aws_ecs = boto3.client('ecs')
logs = boto3.client('logs')

cluster_name=os.environ['CLUSTER_NAME']
app_name=os.environ['APP_NAME']
task_arn=os.environ['TASK_ID']
task_number=task_arn.split(":task/",1)[1] #get the task number id
task_arn=sys.argv[1]

task_id=task_arn.split(":task/",1)[1] #get the task number id
last_event = None
log_group_name='/ecs/'+cluster_name+'/'+app_name

extra_args = {
'logGroupName': log_group_name,
'logStreamName': app_name+'/'+app_name+'/'+task_id,
'startFromHead': True
}

while True:
try:
response = aws_ecs.describe_tasks(
cluster=cluster_name,
tasks=[task_arn])

logs = boto3.client('logs')
task_status = response['tasks'][0]['lastStatus']
print('Task status', task_status)
logGroupName='/ecs/'+cluster_name+'/'+app_name
print('Searching logs for ', logGroupName)
time.sleep(5)

logStreams = logs.describe_log_streams(
logGroupName=logGroupName,
logStreamNamePrefix=app_name+'/'+app_name+'/'+task_number,
limit=1,
descending=True)

for stream in logStreams['logStreams']:
streamName=stream['logStreamName']
print('log Streams', streamName)
logStreamEvents = logs.get_log_events(
logGroupName=logGroupName,
logStreamName=streamName,
startFromHead=True)
for log in logStreamEvents['events']:
print(log['message'])
if task_status == 'STOPPED':
break
time.sleep(5)

log_stream_events = logs.get_log_events(**extra_args)

for event in log_stream_events['events']:
print("%s" % (event['message']))

if 'nextToken' in extra_args and log_stream_events['nextForwardToken'] == extra_args['nextToken']:
if task_status == "STOPPED":
print("======== TASK STOPPED ========")
print("Task ID: %s" % task_id)
print("Task ARN: %s" % task_arn)
print("Service Name: %s" % app_name)
print("Cluster Name: %s" % cluster_name)
print("Started at: %s" % response['tasks'][0]['startedAt'])
print("Stopped at: %s" % response['tasks'][0]['stoppedAt'])
print("Stopped Reason: %s" % response['tasks'][0]['stoppedReason'])
if 'stopCode' in response['tasks'][0]:
print("Stop Code: %s" % response['tasks'][0]['stopCode'])
print("")
break
time.sleep(1)
else:
extra_args['nextToken'] = log_stream_events['nextForwardToken']

except Exception as e:
print("error: " + str(e))
print("Error: " + str(e))
break