-
Notifications
You must be signed in to change notification settings - Fork 413
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chore(custom-resource): implement lambda function that validate certificate and add aliases for NLB #3057
chore(custom-resource): implement lambda function that validate certificate and add aliases for NLB #3057
Changes from all commits
1a4a0ad
bc11ad4
9478151
417a14f
972412c
9c7b646
323876e
7e6cae4
b4225e4
331e02b
9784c23
4868cc5
712157e
4a3c641
8b3ae5c
e089133
d604ff2
ccb1325
4bb9554
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,331 @@ | ||
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
const AWS = require('aws-sdk'); | ||
const CRYPTO = require("crypto"); | ||
const ATTEMPTS_VALIDATION_OPTIONS_READY = 10; | ||
const ATTEMPTS_RECORD_SETS_CHANGE = 10; | ||
const DELAY_RECORD_SETS_CHANGE_IN_S = 30; | ||
const ATTEMPTS_CERTIFICATE_VALIDATED = 19; | ||
const DELAY_CERTIFICATE_VALIDATED_IN_S = 30; | ||
|
||
let acm, envRoute53, envHostedZoneID, appName, envName, serviceName, certificateDomain; | ||
let defaultSleep = function (ms) { | ||
return new Promise((resolve) => setTimeout(resolve, ms)); | ||
}; | ||
let sleep = defaultSleep; | ||
let random = Math.random; | ||
|
||
/** | ||
* Upload a CloudFormation response object to S3. | ||
* | ||
* @param {object} event the Lambda event payload received by the handler function | ||
* @param {object} context the Lambda context received by the handler function | ||
* @param {string} responseStatus the response status, either 'SUCCESS' or 'FAILED' | ||
* @param {string} physicalResourceId CloudFormation physical resource ID | ||
* @param {object} [responseData] arbitrary response data object | ||
* @param {string} [reason] reason for failure, if any, to convey to the user | ||
* @returns {Promise} Promise that is resolved on success, or rejected on connection error or HTTP error response | ||
*/ | ||
function report ( | ||
event, | ||
context, | ||
responseStatus, | ||
physicalResourceId, | ||
responseData, | ||
reason | ||
) { | ||
return new Promise((resolve, reject) => { | ||
const https = require("https"); | ||
const { URL } = require("url"); | ||
|
||
let reasonWithLogInfo = `${reason} (Log: ${context.logGroupName}/${context.logStreamName})`; | ||
let responseBody = JSON.stringify({ | ||
Status: responseStatus, | ||
Reason: reasonWithLogInfo, | ||
PhysicalResourceId: physicalResourceId || context.logStreamName, | ||
StackId: event.StackId, | ||
RequestId: event.RequestId, | ||
LogicalResourceId: event.LogicalResourceId, | ||
Data: responseData, | ||
}); | ||
|
||
const parsedUrl = new URL(event.ResponseURL); | ||
const options = { | ||
hostname: parsedUrl.hostname, | ||
port: 443, | ||
path: parsedUrl.pathname + parsedUrl.search, | ||
method: "PUT", | ||
headers: { | ||
"Content-Type": "", | ||
"Content-Length": responseBody.length, | ||
}, | ||
}; | ||
|
||
https | ||
.request(options) | ||
.on("error", reject) | ||
.on("response", (res) => { | ||
res.resume(); | ||
if (res.statusCode >= 400) { | ||
reject(new Error(`Error ${res.statusCode}: ${res.statusMessage}`)); | ||
} else { | ||
resolve(); | ||
} | ||
}) | ||
.end(responseBody, "utf8"); | ||
}); | ||
} | ||
|
||
exports.handler = async function (event, context) { | ||
const props = event.ResourceProperties; | ||
|
||
let {LoadBalancerDNS: loadBalancerDNS, | ||
LoadBalancerHostedZoneID: loadBalancerHostedZoneID, | ||
DomainName: domainName, | ||
} = props; | ||
const aliases = new Set(props.Aliases); | ||
|
||
acm = new AWS.ACM(); | ||
envRoute53 = new AWS.Route53(); | ||
envHostedZoneID = props.EnvHostedZoneId; | ||
envName = props.EnvName; | ||
appName = props.AppName; | ||
serviceName = props.ServiceName; | ||
certificateDomain = `${serviceName}-nlb.${envName}.${appName}.${domainName}`; | ||
|
||
// NOTE: If the aliases have changed, then we need to replace the certificate being used, as well as deleting/adding | ||
// validation records and A records. In general, any change in aliases indicate a "replacement" of the resources | ||
// managed by the custom resource lambda; on the contrary, the same set of aliases indicate that there is no need to | ||
// replace or update the certificate, nor the validation records or A records. Hence, we can use this as the physicalResourceID. | ||
let aliasesSorted = [...aliases].sort().join(","); | ||
const physicalResourceID = `/${serviceName}/${aliasesSorted}`; | ||
|
||
let handler = async function() { | ||
switch (event.RequestType) { | ||
case "Create": | ||
efekarakus marked this conversation as resolved.
Show resolved
Hide resolved
|
||
await validateAliases(aliases, loadBalancerDNS); | ||
const certificateARN = await requestCertificate({ | ||
aliases: aliases, | ||
idempotencyToken: CRYPTO | ||
.createHash("md5") | ||
.update(physicalResourceID) | ||
.digest("hex")}); | ||
const options = await waitForValidationOptionsToBeReady(certificateARN, aliases); | ||
await activate(options, certificateARN, loadBalancerDNS, loadBalancerHostedZoneID); | ||
break; | ||
case "Update": | ||
case "Delete": | ||
default: | ||
throw new Error(`Unsupported request type ${event.RequestType}`); | ||
} | ||
}; | ||
|
||
try { | ||
await Promise.race([exports.deadlineExpired(), handler(),]); | ||
await report(event, context, "SUCCESS", physicalResourceID); | ||
} catch (err) { | ||
console.log(`Caught error for service ${serviceName}: ${err.message}`); | ||
await report(event, context, "FAILED", physicalResourceID, null, err.message); | ||
} | ||
}; | ||
|
||
/** | ||
* Validate that the aliases are not in use. | ||
* | ||
* @param {Set<String>} aliases for the service. | ||
* @param {String} loadBalancerDNS the DNS of the service's load balancer. | ||
* @throws error if at least one of the aliases is not valid. | ||
*/ | ||
async function validateAliases(aliases, loadBalancerDNS) { | ||
let promises = []; | ||
|
||
for (let alias of aliases) { | ||
const promise = envRoute53.listResourceRecordSets({ | ||
HostedZoneId: envHostedZoneID, | ||
MaxItems: "1", | ||
StartRecordName: alias, | ||
}).promise().then((data) => { | ||
let recordSet = data["ResourceRecordSets"]; | ||
if (!recordSet || recordSet.length === 0) { | ||
return; | ||
} | ||
let aliasTarget = recordSet[0].AliasTarget; | ||
if (aliasTarget && aliasTarget.DNSName === `${loadBalancerDNS}.`) { | ||
return; // The record is an alias record and is in use by myself, hence valid. | ||
} | ||
|
||
if (aliasTarget) { | ||
throw new Error(`Alias ${alias} is already in use by ${aliasTarget.DNSName}. This could be another load balancer of a different service.`); | ||
} | ||
throw new Error(`Alias ${alias} is already in use`); | ||
}) | ||
promises.push(promise); | ||
} | ||
await Promise.all(promises); | ||
} | ||
|
||
/** | ||
* Requests a public certificate from AWS Certificate Manager, using DNS validation. | ||
* | ||
* @param {Object} requestCertificateInput is the input to requestCertificate, containing the alias and idempotencyToken. | ||
* @return {String} The ARN of the requested certificate. | ||
*/ | ||
async function requestCertificate({ aliases, idempotencyToken }) { | ||
const { CertificateArn } = await acm.requestCertificate({ | ||
DomainName: certificateDomain, | ||
IdempotencyToken: idempotencyToken, | ||
SubjectAlternativeNames: aliases.size === 0? null: [...aliases], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it throw an error if we pass in an empty array? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
Tags: [ | ||
{ | ||
Key: "copilot-application", | ||
Value: appName, | ||
}, | ||
{ | ||
Key: "copilot-environment", | ||
Value: envName, | ||
}, | ||
Lou1415926 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
Key: "copilot-service", | ||
Value: serviceName, | ||
}, | ||
], | ||
ValidationMethod: "DNS", | ||
}).promise(); | ||
return CertificateArn; | ||
} | ||
|
||
/** | ||
* Wait until the validation options are ready | ||
* | ||
* @param certificateARN | ||
* @param {Set<String>} aliases for the service. | ||
*/ | ||
async function waitForValidationOptionsToBeReady(certificateARN, aliases) { | ||
let expectedCount = aliases.size + 1; // Expect one validation option for each alias and the cert domain. | ||
|
||
let attempt; // TODO: This wait loops could be further abstracted. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess when we add update/delete? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes in a following PR - especially because DELETE also needs to have a wait loop like this. I think it may improve code's readability if this can be abstracted. |
||
for (attempt = 0; attempt < ATTEMPTS_VALIDATION_OPTIONS_READY; attempt++) { | ||
let readyCount = 0; | ||
const { Certificate } = await acm.describeCertificate({ | ||
CertificateArn: certificateARN, | ||
}).promise(); | ||
const options = Certificate.DomainValidationOptions || []; | ||
options.forEach(option => { | ||
if (option.ResourceRecord && (aliases.has(option.DomainName) || option.DomainName === certificateDomain)) { | ||
readyCount++; | ||
} | ||
}) | ||
if (readyCount === expectedCount) { | ||
return options; | ||
} | ||
|
||
// Exponential backoff with jitter based on 200ms base | ||
// component of backoff fixed to ensure minimum total wait time on | ||
// slow targets. | ||
const base = Math.pow(2, attempt); | ||
await sleep(random() * base * 50 + base * 150); | ||
} | ||
throw new Error(`resource validation records are not ready after ${attempt} tries`); | ||
} | ||
|
||
/** | ||
* Validate the certificate and insert the alias records | ||
* | ||
* @param {Array<Object>} validationOptions | ||
* @param {String} certificateARN | ||
* @param {String} loadBalancerDNS | ||
* @param {String} loadBalancerHostedZone | ||
*/ | ||
async function activate(validationOptions, certificateARN, loadBalancerDNS, loadBalancerHostedZone) { | ||
let promises = []; | ||
for (let option of validationOptions) { | ||
promises.push(activateOption(option, loadBalancerDNS, loadBalancerHostedZone)); | ||
} | ||
await Promise.all(promises); | ||
|
||
await acm.waitFor("certificateValidated", { | ||
// Wait up to 9 minutes and 30 seconds | ||
$waiter: { | ||
delay: DELAY_CERTIFICATE_VALIDATED_IN_S, | ||
maxAttempts: ATTEMPTS_CERTIFICATE_VALIDATED, | ||
}, | ||
CertificateArn: certificateARN, | ||
}).promise(); | ||
} | ||
|
||
/** | ||
* Upsert the validation record for the alias, as well as adding the A record if the alias is not the default certificate domain. | ||
* | ||
* @param {Object} option | ||
* @param {String} loadBalancerDNS | ||
* @param {String} loadBalancerHostedZone | ||
*/ | ||
async function activateOption(option, loadBalancerDNS, loadBalancerHostedZone) { | ||
let changes = [{ | ||
Action: "UPSERT", | ||
ResourceRecordSet: { | ||
Name: option.ResourceRecord.Name, | ||
Type: option.ResourceRecord.Type, | ||
TTL: 60, | ||
ResourceRecords: [ | ||
{ | ||
Value: option.ResourceRecord.Value, | ||
}, | ||
], | ||
} | ||
}]; | ||
|
||
if (option.DomainName !== certificateDomain) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For me to understand the domain name is indeed updated when any There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes the The
|
||
changes.push({ | ||
Action: "UPSERT", // It is validated that if the alias is in use, it is in use by the service itself. | ||
ResourceRecordSet: { | ||
Name: option.DomainName, | ||
Type: "A", | ||
AliasTarget: { | ||
DNSName: loadBalancerDNS, | ||
EvaluateTargetHealth: true, | ||
HostedZoneId: loadBalancerHostedZone, | ||
} | ||
} | ||
}); | ||
} | ||
|
||
let { ChangeInfo } = await envRoute53.changeResourceRecordSets({ | ||
ChangeBatch: { | ||
Comment: "Validate the certificate and create A record for the alias", | ||
Changes: changes, | ||
}, | ||
HostedZoneId: envHostedZoneID, | ||
}).promise(); | ||
|
||
await envRoute53.waitFor('resourceRecordSetsChanged', { | ||
// Wait up to 5 minutes | ||
$waiter: { | ||
delay: DELAY_RECORD_SETS_CHANGE_IN_S, | ||
maxAttempts: ATTEMPTS_RECORD_SETS_CHANGE, | ||
}, | ||
Id: ChangeInfo.Id, | ||
}).promise(); | ||
} | ||
|
||
exports.deadlineExpired = function () { | ||
return new Promise(function (resolve, reject) { | ||
setTimeout( | ||
reject, | ||
14 * 60 * 1000 + 30 * 1000 /* 14.5 minutes*/, | ||
new Error(`Lambda took longer than 14.5 minutes to update custom domain`) | ||
); | ||
}); | ||
}; | ||
|
||
exports.withSleep = function (s) { | ||
sleep = s; | ||
}; | ||
exports.reset = function () { | ||
sleep = defaultSleep; | ||
}; | ||
exports.withDeadlineExpired = function (d) { | ||
exports.deadlineExpired = d; | ||
}; | ||
exports.attemptsValidationOptionsReady = ATTEMPTS_VALIDATION_OPTIONS_READY; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you know if there is a limit on the length of the physical resource ID? Would you mind investigating that, I tried a quick search but couldn't find anything might be worthwhile experimenting.
Using the aliases in the resource ID makes total sense because we want to delete the old certificate right if the alias changes? if so can we add that as a short comment here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To answer 2.:
Yes. For each unique combination of
[service name + alias names]
, the [certificate ('s subject alternative name) + validation records + A records] needed should be the same. If there is no change in[service name + alias names]
, then the certificate shouldn't change, neither do the validation records and the A records; on the contrary, if a change is detected in[service name + alias names]
, then areplacement
surely needs to happen to at least one of the three resources.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Talked offline but I'm posting here so that this info is shared:
Answering 1.: There is virtually no limit on Physical Resource ID length - tried a 800-character long ID, and it was fine. However, if the ID is too long, the response object sent from the lambda will be too big, causing the custom resource to fail.