Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 75 additions & 4 deletions cmd/setup-etcd-environment/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package main

import (
"bytes"
"encoding/json"
"errors"
"flag"
"fmt"
Expand All @@ -14,7 +15,11 @@ import (
"github.com/golang/glog"
"github.com/openshift/machine-config-operator/pkg/version"
"github.com/spf13/cobra"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/klog"
)

var (
Expand All @@ -29,9 +34,23 @@ var (
discoverySRV string
ifName string
outputFile string
pivot bool
}
EtcdScalingAnnotationKey = "etcd.operator.openshift.io/scale"
)

type EtcdScaling struct {
Metadata *metav1.ObjectMeta `json:"metadata,omitempty"`
Members []Member `json:"members,omitempty"`
}

type Member struct {
ID uint64 `json:"ID,omitempty"`
Name string `json:"name,omitempty"`
PeerURLS []string `json:"peerURLs,omitempty"`
ClientURLS []string `json:"clientURLs,omitempty"`
}

func init() {
rootCmd.AddCommand(runCmd)
rootCmd.PersistentFlags().StringVar(&runOpts.discoverySRV, "discovery-srv", "", "DNS domain used to bootstrap initial etcd cluster.")
Expand Down Expand Up @@ -86,9 +105,54 @@ func runRunCmd(cmd *cobra.Command, args []string) error {
out = f
}

if err := writeEnvironmentFile(map[string]string{
"DISCOVERY_SRV": runOpts.discoverySRV,
}, out, true); err != nil {
export := make(map[string]string)
etcdName := os.Getenv("ETCD_NAME")
if etcdName == "" {
return fmt.Errorf("environment variable ETCD_NAME has no value")
}
var e EtcdScaling
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you use a more descriptive name than e, it's hard to remember what this stands for as i read thru below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good.

if runOpts.pivot {
clientConfig, err := rest.InClusterConfig()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean that it requires k8s networking to be up to talk to apiservers... That would be difficult in recovery scenarios

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This functionality is part of cluster bootstrap and relies on the operator so networking would be assumed. DR would require standing up a single node etcd cluster then scaling additional members via operator.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But I will consider how to handle this for that single node. thanks.

if err != nil {
panic(err.Error())
}
client, err := kubernetes.NewForConfig(clientConfig)
if err != nil {
return fmt.Errorf("error creating client: %v", err)
}
duration := 10 * time.Second
// wait forever for success and retry every duration interval
wait.PollInfinite(duration, func() (bool, error) {
result, err := client.CoreV1().ConfigMaps("openshift-etcd").Get("scaling-lock", metav1.GetOptions{})
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At least in this usage, "scaling-lock" seems misnamed.
Perhaps "known-members"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, this configmap originally was a distributed lock similar to the leader election process. But I did not find that to be necessary will consider renaming.

if err != nil {
klog.Errorf("error creating client %v", err)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wondering why you used klog here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see we have glog will drop.

return false, nil
}
if err := json.Unmarshal([]byte(result.Annotations[EtcdScalingAnnotationKey]), &e); err != nil {
klog.Errorf("error decoding result %v", err)
return false, nil
}
if e.Metadata.Name != etcdName {
klog.Errorf("could not find self in scaling-lock")
return false, nil
}
members := e.Members
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need this placeholder var? would using e.Members directly cause an issue?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point will adjust

if len(members) == 0 {
klog.Errorf("no members found in scaling-lock")
return false, nil
}
var memberList []string
for _, m := range members {
memberList = append(memberList, fmt.Sprintf("%s=%s", m.Name, m.PeerURLS[0]))
}
memberList = append(memberList, fmt.Sprintf("%s=https://%s:2380", etcdName, ip))
export["INITIAL_CLUSTER"] = strings.Join(memberList, ",")
return true, nil
})
} else {
export["DISCOVERY_SRV"] = runOpts.discoverySRV
}
if err := writeEnvironmentFile(export, out, true); err != nil {
return err
}

Expand Down Expand Up @@ -137,6 +201,9 @@ func reverseLookupSelf(service, proto, name, self string) (string, error) {
}
selfTarget := ""
for _, srv := range srvs {
if isPivot(srv.Target) {
runOpts.pivot = true
}
glog.V(4).Infof("checking against %s", srv.Target)
addrs, err := net.LookupHost(srv.Target)
if err != nil {
Expand All @@ -156,10 +223,14 @@ func reverseLookupSelf(service, proto, name, self string) (string, error) {
return selfTarget, nil
}

func isPivot(target string) bool {
return strings.HasPrefix(target, "etcd-bootstrap")
}

func writeEnvironmentFile(m map[string]string, w io.Writer, export bool) error {
var buffer bytes.Buffer
for k, v := range m {
env := fmt.Sprintf("ETCD_%s=%s\n", k, v)
env := fmt.Sprintf("ETCD_%s=\"%s\"\n", k, v)
if export == true {
env = fmt.Sprintf("export %s", env)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ contents:
volumeMounts:
- name: discovery
mountPath: /run/etcd/
- name: sa
mountPath: /var/run/secrets/kubernetes.io/serviceaccount/
env:
- name: ETCD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: certs
image: "{{.Images.kubeClientAgentImageKey}}"
command:
Expand Down Expand Up @@ -105,6 +112,10 @@ contents:
--listen-client-urls=https://0.0.0.0:2379 \
--listen-peer-urls=https://0.0.0.0:2380 \
--listen-metrics-urls=https://0.0.0.0:9978 \
lifecycle:
postStart:
exec:
command: ["/bin/sh", "-c", "sleep 5"]
resources:
requests:
memory: 600Mi
Expand All @@ -129,6 +140,10 @@ contents:
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: ETCD_INITIAL_CLUSTER
valueFrom:
fieldRef:
fieldPath: metadata.annotations['etcd.operator.openshift.io/scale']
ports:
- name: peer
containerPort: 2380
Expand Down Expand Up @@ -157,6 +172,10 @@ contents:
--cert-file /etc/ssl/etcd/system:etcd-metric:${ETCD_DNS_NAME}.crt \
--cacert /etc/ssl/etcd/ca.crt \
--trusted-ca-file /etc/ssl/etcd/metric-ca.crt \
lifecycle:
postStart:
exec:
command: ["/bin/sh", "-c", "sleep 5"]
terminationMessagePolicy: FallbackToLogsOnError
volumeMounts:
- name: discovery
Expand Down Expand Up @@ -191,3 +210,6 @@ contents:
- name: conf
hostPath:
path: /etc/etcd
- name: sa
hostPath:
path: /etc/kubernetes/static-pod-resources/etcd-member/secrets/kubernetes.io/sa-token