From cc29530ba0a2ad2b004d50cdea203b4c2926f603 Mon Sep 17 00:00:00 2001 From: Dalton Hubble Date: Sun, 29 Apr 2018 13:19:00 -0700 Subject: [PATCH] Allow preemptible workers on AWS via spot instances * Add `worker_price` to allow worker spot instances. Defaults to empty string for the worker autoscaling group to use regular on-demand instances. * Add `spot_price` to internal `workers` module for spot worker pools * Note: Unlike GCP `preemptible` workers, spot instances require you to pick a bid price. --- CHANGES.md | 9 ++++++++- aws/container-linux/kubernetes/variables.tf | 6 ++++++ aws/container-linux/kubernetes/workers.tf | 1 + aws/container-linux/kubernetes/workers/variables.tf | 6 ++++++ aws/container-linux/kubernetes/workers/workers.tf | 7 +++++++ aws/fedora-atomic/kubernetes/variables.tf | 6 ++++++ aws/fedora-atomic/kubernetes/workers.tf | 1 + aws/fedora-atomic/kubernetes/workers/variables.tf | 6 ++++++ aws/fedora-atomic/kubernetes/workers/workers.tf | 7 +++++++ docs/advanced/worker-pools.md | 3 ++- docs/atomic/aws.md | 1 + docs/cl/aws.md | 1 + 12 files changed, 52 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 7c129e8ae..83086e62a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,7 +2,14 @@ Notable changes between versions. -## Latest +#### AWS + +* Allow "preemptible" workers via spot instances ([#202](https://github.com/poseidon/typhoon/pull/202)) + * Add `worker_price` to allow worker spot instances. Defaults to empty string for the worker autoscaling group to use regular on-demand instances. + * Add `spot_price` to internal `workers` module for spot [worker pools](https://typhoon.psdn.io/advanced/worker-pools/) + * Note: Unlike GCP `preemptible` workers, spot instances require you to pick a bid price. + +## v1.10.2 * [Introduce](https://typhoon.psdn.io/announce/#april-26-2018) Typhoon for Fedora Atomic ([#199](https://github.com/poseidon/typhoon/pull/199)) * Kubernetes [v1.10.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.10.md#v1102) diff --git a/aws/container-linux/kubernetes/variables.tf b/aws/container-linux/kubernetes/variables.tf index 54b9f22ae..a3eb29521 100644 --- a/aws/container-linux/kubernetes/variables.tf +++ b/aws/container-linux/kubernetes/variables.tf @@ -59,6 +59,12 @@ variable "disk_type" { description = "Type of the EBS volume (e.g. standard, gp2, io1)" } +variable "worker_price" { + type = "string" + default = "" + description = "Spot price in USD for autoscaling group spot instances. Leave as default empty string for autoscaling group to use on-demand instances. Note, switching in-place from spot to on-demand is not possible: https://github.com/terraform-providers/terraform-provider-aws/issues/4320" +} + variable "controller_clc_snippets" { type = "list" description = "Controller Container Linux Config snippets" diff --git a/aws/container-linux/kubernetes/workers.tf b/aws/container-linux/kubernetes/workers.tf index aa7906088..39091c321 100644 --- a/aws/container-linux/kubernetes/workers.tf +++ b/aws/container-linux/kubernetes/workers.tf @@ -10,6 +10,7 @@ module "workers" { instance_type = "${var.worker_type}" os_channel = "${var.os_channel}" disk_size = "${var.disk_size}" + spot_price = "${var.worker_price}" # configuration kubeconfig = "${module.bootkube.kubeconfig}" diff --git a/aws/container-linux/kubernetes/workers/variables.tf b/aws/container-linux/kubernetes/workers/variables.tf index 5e3a52408..0208b4641 100644 --- a/aws/container-linux/kubernetes/workers/variables.tf +++ b/aws/container-linux/kubernetes/workers/variables.tf @@ -52,6 +52,12 @@ variable "disk_type" { description = "Type of the EBS volume (e.g. standard, gp2, io1)" } +variable "spot_price" { + type = "string" + default = "" + description = "Spot price in USD for autoscaling group spot instances. Leave as default empty string for autoscaling group to use on-demand instances. Note, switching in-place from spot to on-demand is not possible: https://github.com/terraform-providers/terraform-provider-aws/issues/4320" +} + variable "clc_snippets" { type = "list" description = "Container Linux Config snippets" diff --git a/aws/container-linux/kubernetes/workers/workers.tf b/aws/container-linux/kubernetes/workers/workers.tf index 833b4762f..fc6a6f7d7 100644 --- a/aws/container-linux/kubernetes/workers/workers.tf +++ b/aws/container-linux/kubernetes/workers/workers.tf @@ -26,6 +26,12 @@ resource "aws_autoscaling_group" "workers" { create_before_destroy = true } + # Waiting for instance creation delays adding the ASG to state. If instances + # can't be created (e.g. spot price too low), the ASG will be orphaned. + # Orphaned ASGs escape cleanup, can't be updated, and keep bidding if spot is + # used. Disable wait to avoid issues and align with other clouds. + wait_for_capacity_timeout = "0" + tags = [{ key = "Name" value = "${var.name}-worker" @@ -37,6 +43,7 @@ resource "aws_autoscaling_group" "workers" { resource "aws_launch_configuration" "worker" { image_id = "${data.aws_ami.coreos.image_id}" instance_type = "${var.instance_type}" + spot_price = "${var.spot_price}" user_data = "${data.ct_config.worker_ign.rendered}" diff --git a/aws/fedora-atomic/kubernetes/variables.tf b/aws/fedora-atomic/kubernetes/variables.tf index 0cb40ba79..3393618c4 100644 --- a/aws/fedora-atomic/kubernetes/variables.tf +++ b/aws/fedora-atomic/kubernetes/variables.tf @@ -53,6 +53,12 @@ variable "disk_type" { description = "Type of the EBS volume (e.g. standard, gp2, io1)" } +variable "worker_price" { + type = "string" + default = "" + description = "Spot price in USD for autoscaling group spot instances. Leave as default empty string for autoscaling group to use on-demand instances. Note, switching in-place from spot to on-demand is not possible: https://github.com/terraform-providers/terraform-provider-aws/issues/4320" +} + # configuration variable "ssh_authorized_key" { diff --git a/aws/fedora-atomic/kubernetes/workers.tf b/aws/fedora-atomic/kubernetes/workers.tf index 2f29e7c57..bdd00e80c 100644 --- a/aws/fedora-atomic/kubernetes/workers.tf +++ b/aws/fedora-atomic/kubernetes/workers.tf @@ -9,6 +9,7 @@ module "workers" { count = "${var.worker_count}" instance_type = "${var.worker_type}" disk_size = "${var.disk_size}" + spot_price = "${var.worker_price}" # configuration kubeconfig = "${module.bootkube.kubeconfig}" diff --git a/aws/fedora-atomic/kubernetes/workers/variables.tf b/aws/fedora-atomic/kubernetes/workers/variables.tf index 0d427a32b..de772f69a 100644 --- a/aws/fedora-atomic/kubernetes/workers/variables.tf +++ b/aws/fedora-atomic/kubernetes/workers/variables.tf @@ -46,6 +46,12 @@ variable "disk_type" { description = "Type of the EBS volume (e.g. standard, gp2, io1)" } +variable "spot_price" { + type = "string" + default = "" + description = "Spot price in USD for autoscaling group spot instances. Leave as default empty string for autoscaling group to use on-demand instances. Note, switching in-place from spot to on-demand is not possible: https://github.com/terraform-providers/terraform-provider-aws/issues/4320" +} + # configuration variable "kubeconfig" { diff --git a/aws/fedora-atomic/kubernetes/workers/workers.tf b/aws/fedora-atomic/kubernetes/workers/workers.tf index 05b1516bf..cdb428630 100644 --- a/aws/fedora-atomic/kubernetes/workers/workers.tf +++ b/aws/fedora-atomic/kubernetes/workers/workers.tf @@ -26,6 +26,12 @@ resource "aws_autoscaling_group" "workers" { create_before_destroy = true } + # Waiting for instance creation delays adding the ASG to state. If instances + # can't be created (e.g. spot price too low), the ASG will be orphaned. + # Orphaned ASGs escape cleanup, can't be updated, and keep bidding if spot is + # used. Disable wait to avoid issues and align with other clouds. + wait_for_capacity_timeout = "0" + tags = [{ key = "Name" value = "${var.name}-worker" @@ -37,6 +43,7 @@ resource "aws_autoscaling_group" "workers" { resource "aws_launch_configuration" "worker" { image_id = "${data.aws_ami.fedora.image_id}" instance_type = "${var.instance_type}" + spot_price = "${var.spot_price}" user_data = "${data.template_file.worker-cloudinit.rendered}" diff --git a/docs/advanced/worker-pools.md b/docs/advanced/worker-pools.md index 77d5651bb..081c18f26 100644 --- a/docs/advanced/worker-pools.md +++ b/docs/advanced/worker-pools.md @@ -68,10 +68,11 @@ The AWS internal `workers` module supports a number of [variables](https://githu | instance_type | EC2 instance type | "t2.small" | "t2.medium" | | os_channel | Container Linux AMI channel | stable| "beta", "alpha" | | disk_size | Size of the disk in GB | 40 | 100 | +| spot_price | Spot price in USD for workers. Leave as default empty string for regular on-demand instances | "" | "0.10" | | service_cidr | Must match `service_cidr` of cluster | "10.3.0.0/16" | "10.3.0.0/24" | | cluster_domain_suffix | Must match `cluster_domain_suffix` of cluster | "cluster.local" | "k8s.example.com" | -Check the list of valid [instance types](https://aws.amazon.com/ec2/instance-types/). +Check the list of valid [instance types](https://aws.amazon.com/ec2/instance-types/) or per-region and per-type [spot prices](https://aws.amazon.com/ec2/spot/pricing/). ## Google Cloud diff --git a/docs/atomic/aws.md b/docs/atomic/aws.md index c8566ac67..586df5540 100644 --- a/docs/atomic/aws.md +++ b/docs/atomic/aws.md @@ -227,6 +227,7 @@ Reference the DNS zone id with `"${aws_route53_zone.zone-for-clusters.zone_id}"` | worker_type | EC2 instance type for workers | "t2.small" | See below | | disk_size | Size of the EBS volume in GB | "40" | "100" | | disk_type | Type of the EBS volume | "gp2" | standard, gp2, io1 | +| worker_price | Spot price in USD for workers. Leave as default empty string for regular on-demand instances | "" | "0.10" | | networking | Choice of networking provider | "calico" | "calico" or "flannel" | | network_mtu | CNI interface MTU (calico only) | 1480 | 8981 | | host_cidr | CIDR IPv4 range to assign to EC2 instances | "10.0.0.0/16" | "10.1.0.0/16" | diff --git a/docs/cl/aws.md b/docs/cl/aws.md index 58c1bb32c..ec3496f9b 100644 --- a/docs/cl/aws.md +++ b/docs/cl/aws.md @@ -244,6 +244,7 @@ Reference the DNS zone id with `"${aws_route53_zone.zone-for-clusters.zone_id}"` | os_channel | Container Linux AMI channel | stable | stable, beta, alpha | | disk_size | Size of the EBS volume in GB | "40" | "100" | | disk_type | Type of the EBS volume | "gp2" | standard, gp2, io1 | +| worker_price | Spot price in USD for workers. Leave as default empty string for regular on-demand instances | "" | "0.10" | | controller_clc_snippets | Controller Container Linux Config snippets | [] | | | worker_clc_snippets | Worker Container Linux Config snippets | [] | | | networking | Choice of networking provider | "calico" | "calico" or "flannel" |