Skip to content
This repository was archived by the owner on Feb 5, 2020. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion config.tf
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ variable "tectonic_container_images" {
awscli = "quay.io/coreos/awscli:025a357f05242fdad6a81e8a6b520098aa65a600"
gcloudsdk = "google/cloud-sdk:178.0.0-alpine"
bootkube = "quay.io/coreos/bootkube:v0.10.0"
tnc_bootstrap = "quay.io/coreos/tectonic-node-controller-dev:76a584680b7f39aa7b3c40cd742c736b30b5a89a"
tnc_bootstrap = "quay.io/coreos/tectonic-node-controller-bootstrap-dev:f6d5e710a97a8cd6f4cd2963f4426131f854a869"
etcd = "quay.io/coreos/etcd:v3.2.14"
hyperkube = "quay.io/coreos/hyperkube:v1.9.1_coreos.0"
kube_core_renderer = "quay.io/coreos/kube-core-renderer-dev:4ed85ee12e167da71e7d5f06ffdb94d1ce21f540"
Expand Down
3 changes: 3 additions & 0 deletions installer/pkg/workflow/destroy.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ func destroyAssetsStep(m *metadata) error {
}

func destroyBootstrapStep(m *metadata) error {
if err := runDestroyStep(m.clusterDir, etcdStep); err != nil {
return err
}
return runDestroyStep(m.clusterDir, bootstrapStep)
}

Expand Down
6 changes: 4 additions & 2 deletions installer/pkg/workflow/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,13 @@ func installBootstrapStep(m *metadata) error {
return err
}

if err := waitForTNC(m); err != nil {
destroyCNAME(m.clusterDir)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ignition does not continuously retry to download forever, so I think this introduces a race. What if one of the etcd machines gets provisioned and tries to load it’s ignition before the TNC is up?

I think this should go after the waitForTNC step.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes there's a race which is currently only relying on retries. The problem here is that there's no obvious way for etcd to waitForTNC (when this is running as static pod) as the kubeclient won't be able to get answers until the server gets the state

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I second @squat's concern about the race condition. If we know there could be one, we should try to find a solution.
I don't have enough insight into the matter right now to suggest one here, but let me know if you need my help looking into it.

Copy link
Contributor Author

@enxebre enxebre Mar 20, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

@enxebre enxebre Mar 20, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alexsomesan @squat it's verified that Ignition will never continue if it isn't completely successful.
waitForTNC(m) where it is now, will ensure that the step holds until etcd comes up (so the cluster get state, so the TNC daemonset gets deployed and so the api server actually gives a response, so waitForTNC can finish)
This is not strictly necessary as the every node will be able to get its config from the TNC pod, so the question is: do we still want to waitForTNC(m)? wdyt?

if err := runInstallStep(m.clusterDir, etcdStep); err != nil {
return err
}

return destroyCNAME(m.clusterDir)
return waitForTNC(m)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By having it here we ensure that the step holds until etcd comes up (so the api server actually gives a response and waitForTNC finish when the TNC daemonset gets deployed)

}

func installJoinStep(m *metadata) error {
Expand Down
1 change: 1 addition & 0 deletions installer/pkg/workflow/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const (
stepsBaseDir = "steps"
assetsStep = "assets"
bootstrapStep = "bootstrap"
etcdStep = "etcd"
joinStep = "joining"
configFileName = "config.yaml"
internalFileName = "internal.yaml"
Expand Down
11 changes: 9 additions & 2 deletions modules/aws/etcd/ignition_s3.tf → modules/aws/etcd/ignition.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,17 @@ locals {
ignition_etcd_keys = ["ignition_etcd_0.json", "ignition_etcd_1.json", "ignition_etcd_2.json"]
}

data "ignition_config" "s3" {
data "ignition_config" "tnc" {
count = "${length(var.external_endpoints) == 0 ? var.instance_count : 0}"

replace {
append {
source = "${format("http://${var.cluster_name}-tnc.${var.base_domain}/ignition?role=etcd&etcd_index=%d", count.index)}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:p it’s a little funny doing both interpolation and formatting but not a blocker


# TODO: add verification
}

# Used for loading certificates
append {
source = "${format("s3://%s/%s", var.s3_bucket, local.ignition_etcd_keys[count.index])}"

# TODO: add verification
Expand Down
2 changes: 1 addition & 1 deletion modules/aws/etcd/nodes.tf
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ resource "aws_instance" "etcd_node" {
instance_type = "${var.ec2_type}"
key_name = "${var.ssh_key}"
subnet_id = "${element(var.subnets, count.index)}"
user_data = "${data.ignition_config.s3.*.rendered[count.index]}"
user_data = "${data.ignition_config.tnc.*.rendered[count.index]}"
vpc_security_group_ids = ["${var.sg_ids}"]

lifecycle {
Expand Down
48 changes: 29 additions & 19 deletions modules/bootkube/assets.tf
Original file line number Diff line number Diff line change
Expand Up @@ -159,30 +159,40 @@ resource "local_file" "tnc_pod_config" {
filename = "./generated/tnc-config"
}

data "template_file" "initial_cluster" {
count = "${length(var.etcd_endpoints)}"
template = "${var.etcd_endpoints[count.index]}=https://${var.etcd_endpoints[count.index]}:2380"
}

data "template_file" "tnc_config" {
template = "${file("${path.module}/resources/tnc-config")}"

vars {
cloud_provider_config = "${var.cloud_provider_config}"

http_proxy = "${var.http_proxy}"
https_proxy = "${var.https_proxy}"
no_proxy = "${join(",", var.no_proxy)}"
kubelet_image_url = "${replace(var.container_images["hyperkube"],var.image_re,"$1")}"
kubelet_image_tag = "${replace(var.container_images["hyperkube"],var.image_re,"$2")}"
iscsi_enabled = "${var.iscsi_enabled}"
kubeconfig_fetch_cmd = "${var.kubeconfig_fetch_cmd != "" ? "ExecStartPre=${var.kubeconfig_fetch_cmd}" : ""}"
tectonic_torcx_image_url = "${replace(var.container_images["tectonic_torcx"],var.image_re,"$1")}"
tectonic_torcx_image_tag = "${replace(var.container_images["tectonic_torcx"],var.image_re,"$2")}"
torcx_skip_setup = "false"
torcx_store_url = "${var.torcx_store_url}"
bootstrap_upgrade_cl = "${var.bootstrap_upgrade_cl}"
master_node_label = "${var.kubelet_master_node_label}"
worker_node_label = "${var.kubelet_worker_node_label}"
node_taints_param = "${var.kubelet_node_taints != "" ? "--register-with-taints=${var.kubelet_node_taints}" : ""}"
cluster_dns_ip = "${var.kube_dns_service_ip}"
cloud_provider = "${var.cloud_provider}"
debug_config = "${var.kubelet_debug_config}"
cluster_name = "${var.cluster_name}"
http_proxy = "${var.http_proxy}"
https_proxy = "${var.https_proxy}"
no_proxy = "${join(",", var.no_proxy)}"
kubelet_image_url = "${replace(var.container_images["hyperkube"],var.image_re,"$1")}"
kubelet_image_tag = "${replace(var.container_images["hyperkube"],var.image_re,"$2")}"
iscsi_enabled = "${var.iscsi_enabled}"
kubeconfig_fetch_cmd = "${var.kubeconfig_fetch_cmd != "" ? "ExecStartPre=${var.kubeconfig_fetch_cmd}" : ""}"
tectonic_torcx_image_url = "${replace(var.container_images["tectonic_torcx"],var.image_re,"$1")}"
tectonic_torcx_image_tag = "${replace(var.container_images["tectonic_torcx"],var.image_re,"$2")}"
torcx_skip_setup = "false"
torcx_store_url = "${var.torcx_store_url}"
bootstrap_upgrade_cl = "${var.bootstrap_upgrade_cl}"
master_node_label = "${var.kubelet_master_node_label}"
worker_node_label = "${var.kubelet_worker_node_label}"
node_taints_param = "${var.kubelet_node_taints != "" ? "--register-with-taints=${var.kubelet_node_taints}" : ""}"
cluster_dns_ip = "${var.kube_dns_service_ip}"
cloud_provider = "${var.cloud_provider}"
debug_config = "${var.kubelet_debug_config}"
cluster_name = "${var.cluster_name}"
base_domain = "${var.base_domain}"
etcd_initial_cluster_list = "${length(var.etcd_endpoints) > 0 ? format("--initial-cluster=%s", join(",", data.template_file.initial_cluster.*.rendered)) : ""}"
etcd_image = "${var.container_images["etcd"]}"
etcd_metadata_env = "${var.etcd_metadata_env}"
etcd_metadata_deps = "${var.etcd_metadata_deps}"
}
}
5 changes: 5 additions & 0 deletions modules/bootkube/resources/tnc-config
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,8 @@ CloudProvider: "${cloud_provider}"
CloudProviderConfig: "${cloud_provider_config}"
DebugConfig: "${debug_config}"
ClusterName: "${cluster_name}"
BaseDomain: "${base_domain}"
EtcdInitialCluster: "${etcd_initial_cluster_list}"
EtcdImage: "${etcd_image}"
EtcdMetadataEnv: "${etcd_metadata_env}"
EtcdMetadataDeps: "${etcd_metadata_deps}"
15 changes: 15 additions & 0 deletions modules/bootkube/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,18 @@ EOF

type = "string"
}

variable "base_domain" {
type = "string"
description = "The cluster base domain"
}

variable "etcd_metadata_env" {
type = "string"
description = "The cluster base domain"
}

variable "etcd_metadata_deps" {
type = "string"
description = "The cluster base domain"
}
8 changes: 0 additions & 8 deletions modules/dns/route53/etcd.tf

This file was deleted.

9 changes: 0 additions & 9 deletions modules/dns/route53/outputs.tf
Original file line number Diff line number Diff line change
@@ -1,12 +1,3 @@
output "etcd_a_nodes" {
value = "${aws_route53_record.etcd_a_nodes.*.fqdn}"
}

# We have to do this join() & split() 'trick' because the ternary operator can't output lists.
output "etcd_endpoints" {
value = ["${split(",", length(var.external_endpoints) == 0 ? join(",", aws_route53_record.etcd_a_nodes.*.fqdn) : join(",", var.external_endpoints))}"]
}

output "worker_nodes" {
value = "${aws_route53_record.worker_nodes.*.fqdn}"
}
Expand Down
5 changes: 0 additions & 5 deletions modules/dns/route53/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,6 @@ variable "etcd_count" {
type = "string"
}

variable "etcd_ip_addresses" {
description = "List of string IPs for etcd nodes"
type = "list"
}

variable "master_ip_addresses" {
description = "List of string IPs for masters"
type = "list"
Expand Down
34 changes: 1 addition & 33 deletions steps/assets/ignition-etcd.tf
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,5 @@ locals {
data "ignition_config" "etcd" {
count = "${local.etcd_instance_count}"

systemd = [
"${data.ignition_systemd_unit.locksmithd.*.id[count.index]}",
"${module.ignition_bootstrap.etcd_dropin_id_list[count.index]}",
]

files = ["${compact(list(
module.ignition_bootstrap.profile_env_id,
module.ignition_bootstrap.systemd_default_env_id,
))}",
"${module.ignition_bootstrap.etcd_crt_id_list}",
]
}

data "ignition_systemd_unit" "locksmithd" {
count = "${local.etcd_instance_count}"

name = "locksmithd.service"
enabled = true

dropin = [
{
name = "40-etcd-lock.conf"

content = <<EOF
[Service]
Environment=REBOOT_STRATEGY=etcd-lock
Environment="LOCKSMITHD_ETCD_CAFILE=/etc/ssl/etcd/ca.crt"
Environment="LOCKSMITHD_ETCD_KEYFILE=/etc/ssl/etcd/client.key"
Environment="LOCKSMITHD_ETCD_CERTFILE=/etc/ssl/etcd/client.crt"
Environment="LOCKSMITHD_ENDPOINT=https://${var.tectonic_cluster_name}-etcd-${count.index}.${var.tectonic_base_domain}:2380"
EOF
},
]
files = ["${module.ignition_bootstrap.etcd_crt_id_list}"]
}
7 changes: 7 additions & 0 deletions steps/assets/tectonic.tf
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ module "bootkube" {
kube_dns_service_ip = "${module.bootkube.kube_dns_service_ip}"
kubelet_master_node_label = "node-role.kubernetes.io/master"
kubelet_worker_node_label = "node-role.kubernetes.io/worker"
base_domain = "${var.tectonic_base_domain}"
etcd_metadata_env = "EnvironmentFile=/run/metadata/coreos"

etcd_metadata_deps = <<EOF
Requires=coreos-metadata.service
After=coreos-metadata.service
EOF
}

module "tectonic" {
Expand Down
1 change: 0 additions & 1 deletion steps/bootstrap/inputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,4 @@ data "terraform_remote_state" "assets" {
locals {
kubeconfig_kubelet_content = "${data.terraform_remote_state.assets.kubeconfig_kubelet_content}"
ignition_bootstrap = "${data.terraform_remote_state.assets.ignition_bootstrap}"
ignition_etcd = "${data.terraform_remote_state.assets.ignition_etcd}"
}
25 changes: 0 additions & 25 deletions steps/bootstrap/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -47,30 +47,6 @@ module "vpc" {
public_master_endpoints = "${var.tectonic_aws_public_endpoints}"
}

module "etcd" {
source = "../../modules/aws/etcd"

base_domain = "${var.tectonic_base_domain}"
cluster_id = "${var.tectonic_cluster_id}"
cluster_name = "${var.tectonic_cluster_name}"
container_image = "${var.tectonic_container_images["etcd"]}"
container_linux_channel = "${var.tectonic_container_linux_channel}"
container_linux_version = "${module.container_linux.version}"
ec2_type = "${var.tectonic_aws_etcd_ec2_type}"
external_endpoints = "${compact(var.tectonic_etcd_servers)}"
extra_tags = "${var.tectonic_aws_extra_tags}"
instance_count = "${length(data.template_file.etcd_hostname_list.*.id)}"
root_volume_iops = "${var.tectonic_aws_etcd_root_volume_iops}"
root_volume_size = "${var.tectonic_aws_etcd_root_volume_size}"
root_volume_type = "${var.tectonic_aws_etcd_root_volume_type}"
s3_bucket = "${aws_s3_bucket.tectonic.bucket}"
sg_ids = "${concat(var.tectonic_aws_etcd_extra_sg_ids, list(module.vpc.etcd_sg_id))}"
ssh_key = "${var.tectonic_aws_ssh_key}"
subnets = "${module.vpc.worker_subnet_ids}"
etcd_iam_role = "${var.tectonic_aws_etcd_iam_role_name}"
ec2_ami = "${var.tectonic_aws_ec2_ami_override}"
}

module "masters" {
source = "../../modules/aws/master-asg"

Expand Down Expand Up @@ -139,7 +115,6 @@ module "dns" {
custom_dns_name = "${var.tectonic_dns_name}"
elb_alias_enabled = true
etcd_count = "${length(data.template_file.etcd_hostname_list.*.id)}"
etcd_ip_addresses = "${module.etcd.ip_addresses}"
external_endpoints = ["${compact(var.tectonic_etcd_servers)}"]
master_count = "${var.tectonic_master_count}"
tectonic_external_private_zone = "${join("", aws_route53_zone.tectonic_int.*.zone_id)}"
Expand Down
17 changes: 17 additions & 0 deletions steps/bootstrap/outputs.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
# Etcd
output "container_linux_version" {
value = "${module.container_linux.version}"
}

output "etcd_instance_count" {
value = "${length(data.template_file.etcd_hostname_list.*.id)}"
}

output "etcd_sg_id" {
value = "${module.vpc.etcd_sg_id}"
}

output "s3_bucket" {
value = "${aws_s3_bucket.tectonic.bucket}"
}

# Masters
output "aws_launch_configuration_masters" {
value = "${module.masters.aws_launch_configuration}"
Expand Down
16 changes: 0 additions & 16 deletions steps/bootstrap/s3-assets.tf
Original file line number Diff line number Diff line change
Expand Up @@ -73,19 +73,3 @@ resource "aws_s3_bucket_object" "ignition_bootstrap" {
"tectonicClusterID", "${var.tectonic_cluster_id}"
), var.tectonic_aws_extra_tags)}"
}

resource "aws_s3_bucket_object" "ignition_etcd" {
count = "${length(data.template_file.etcd_hostname_list.*.id)}"
bucket = "${aws_s3_bucket.tectonic.bucket}"
key = "ignition_etcd_${count.index}.json"
content = "${local.ignition_etcd[count.index]}"
acl = "private"

server_side_encryption = "AES256"

tags = "${merge(map(
"Name", "${var.tectonic_cluster_name}-ignition-etcd-${count.index}",
"KubernetesCluster", "${var.tectonic_cluster_name}",
"tectonicClusterID", "${var.tectonic_cluster_id}"
), var.tectonic_aws_extra_tags)}"
}
1 change: 1 addition & 0 deletions steps/etcd/config.tf
59 changes: 59 additions & 0 deletions steps/etcd/etcd.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
provider "aws" {
region = "${var.tectonic_aws_region}"
profile = "${var.tectonic_aws_profile}"
version = "1.8.0"

assume_role {
role_arn = "${var.tectonic_aws_installer_role == "" ? "" : "${var.tectonic_aws_installer_role}"}"
session_name = "TECTONIC_INSTALLER_${var.tectonic_cluster_name}"
}
}

resource "aws_s3_bucket_object" "ignition_etcd" {
count = "${local.instance_count}"
bucket = "${local.s3_bucket}"
key = "ignition_etcd_${count.index}.json"
content = "${local.ignition_etcd[count.index]}"
acl = "private"

server_side_encryption = "AES256"

tags = "${merge(map(
"Name", "${var.tectonic_cluster_name}-ignition-etcd-${count.index}",
"KubernetesCluster", "${var.tectonic_cluster_name}",
"tectonicClusterID", "${var.tectonic_cluster_id}"
), var.tectonic_aws_extra_tags)}"
}

module "etcd" {
source = "../../modules/aws/etcd"

base_domain = "${var.tectonic_base_domain}"
cluster_id = "${var.tectonic_cluster_id}"
cluster_name = "${var.tectonic_cluster_name}"
container_image = "${var.tectonic_container_images["etcd"]}"
container_linux_channel = "${var.tectonic_container_linux_channel}"
container_linux_version = "${local.container_linux_version}"
ec2_type = "${var.tectonic_aws_etcd_ec2_type}"
external_endpoints = "${compact(var.tectonic_etcd_servers)}"
extra_tags = "${var.tectonic_aws_extra_tags}"
instance_count = "${local.instance_count}"
root_volume_iops = "${var.tectonic_aws_etcd_root_volume_iops}"
root_volume_size = "${var.tectonic_aws_etcd_root_volume_size}"
root_volume_type = "${var.tectonic_aws_etcd_root_volume_type}"
s3_bucket = "${local.s3_bucket}"
sg_ids = "${concat(var.tectonic_aws_etcd_extra_sg_ids, list(local.sg_id))}"
ssh_key = "${var.tectonic_aws_ssh_key}"
subnets = ["${local.subnet_ids_workers}"]
etcd_iam_role = "${var.tectonic_aws_etcd_iam_role_name}"
ec2_ami = "${var.tectonic_aws_ec2_ami_override}"
}

resource "aws_route53_record" "etcd_a_nodes" {
count = "${local.instance_count}"
type = "A"
ttl = "60"
zone_id = "${local.private_zone_id}"
name = "${var.tectonic_cluster_name}-etcd-${count.index}"
records = ["${module.etcd.ip_addresses[count.index]}"]
}
Loading