From 696ef214ae628b86689998f1182eb8e0c65a43f7 Mon Sep 17 00:00:00 2001 From: Dalton Hubble Date: Sat, 7 Apr 2018 23:16:27 -0700 Subject: [PATCH] Return Prometheus deployment to be a worker workload * Expose etcd metrics to workers so Prometheus can run on a worker, rather than a controller * Drop temporary firewall rules allowing Prometheus to run on a controller and scrape targes * Related to https://github.com/poseidon/typhoon/pull/175 --- addons/prometheus/deployment.yaml | 6 ---- aws/container-linux/kubernetes/security.tf | 30 +++++++------------ .../container-linux/kubernetes/network.tf | 16 +++++++++- 3 files changed, 25 insertions(+), 27 deletions(-) diff --git a/addons/prometheus/deployment.yaml b/addons/prometheus/deployment.yaml index f416de4ae..82c6981d7 100644 --- a/addons/prometheus/deployment.yaml +++ b/addons/prometheus/deployment.yaml @@ -15,12 +15,6 @@ spec: name: prometheus phase: prod spec: - nodeSelector: - node-role.kubernetes.io/master: "" - tolerations: - - key: node-role.kubernetes.io/master - operator: Exists - effect: NoSchedule serviceAccountName: prometheus containers: - name: prometheus diff --git a/aws/container-linux/kubernetes/security.tf b/aws/container-linux/kubernetes/security.tf index 79fa1cc72..9c729c954 100644 --- a/aws/container-linux/kubernetes/security.tf +++ b/aws/container-linux/kubernetes/security.tf @@ -51,6 +51,16 @@ resource "aws_security_group_rule" "controller-etcd" { self = true } +resource "aws_security_group_rule" "controller-etcd-metrics" { + security_group_id = "${aws_security_group.controller.id}" + + type = "ingress" + protocol = "tcp" + from_port = 2381 + to_port = 2381 + source_security_group_id = "${aws_security_group.worker.id}" +} + resource "aws_security_group_rule" "controller-flannel" { security_group_id = "${aws_security_group.controller.id}" @@ -81,16 +91,6 @@ resource "aws_security_group_rule" "controller-node-exporter" { source_security_group_id = "${aws_security_group.worker.id}" } -resource "aws_security_group_rule" "controller-node-exporter-self" { - security_group_id = "${aws_security_group.controller.id}" - - type = "ingress" - protocol = "tcp" - from_port = 9100 - to_port = 9100 - self = true -} - resource "aws_security_group_rule" "controller-kubelet-self" { security_group_id = "${aws_security_group.controller.id}" @@ -266,16 +266,6 @@ resource "aws_security_group_rule" "worker-flannel-self" { resource "aws_security_group_rule" "worker-node-exporter" { security_group_id = "${aws_security_group.worker.id}" - type = "ingress" - protocol = "tcp" - from_port = 9100 - to_port = 9100 - source_security_group_id = "${aws_security_group.controller.id}" -} - -resource "aws_security_group_rule" "worker-node-exporter-self" { - security_group_id = "${aws_security_group.worker.id}" - type = "ingress" protocol = "tcp" from_port = 9100 diff --git a/google-cloud/container-linux/kubernetes/network.tf b/google-cloud/container-linux/kubernetes/network.tf index 74b07ab34..619faa8b0 100644 --- a/google-cloud/container-linux/kubernetes/network.tf +++ b/google-cloud/container-linux/kubernetes/network.tf @@ -56,6 +56,20 @@ resource "google_compute_firewall" "internal-etcd" { target_tags = ["${var.cluster_name}-controller"] } +# Allow Prometheus to scrape etcd metrics +resource "google_compute_firewall" "internal-etcd-metrics" { + name = "${var.cluster_name}-internal-etcd-metrics" + network = "${google_compute_network.network.name}" + + allow { + protocol = "tcp" + ports = [2381] + } + + source_tags = ["${var.cluster_name}-worker"] + target_tags = ["${var.cluster_name}-controller"] +} + # Calico BGP and IPIP # https://docs.projectcalico.org/v2.5/reference/public-cloud/gce resource "google_compute_firewall" "internal-calico" { @@ -103,7 +117,7 @@ resource "google_compute_firewall" "internal-node-exporter" { ports = [9100] } - source_tags = ["${var.cluster_name}-controller", "${var.cluster_name}-worker"] + source_tags = ["${var.cluster_name}-worker"] target_tags = ["${var.cluster_name}-controller", "${var.cluster_name}-worker"] }