diff --git a/go.mod b/go.mod index 8b76914007..63484e552c 100644 --- a/go.mod +++ b/go.mod @@ -50,6 +50,7 @@ require ( go.etcd.io/etcd v0.5.0-alpha.5.0.20200819165624-17cef6e3e9d5 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 golang.org/x/net v0.0.0-20200707034311-ab3426394381 + golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4 golang.org/x/time v0.0.0-20191024005414-555d28b269f0 gopkg.in/asn1-ber.v1 v1.0.0-20181015200546-f715ec2f112d // indirect gopkg.in/ldap.v2 v2.5.1 diff --git a/pkg/config/client/client_config.go b/pkg/config/client/client_config.go index a247311057..e2b90ca531 100644 --- a/pkg/config/client/client_config.go +++ b/pkg/config/client/client_config.go @@ -2,14 +2,12 @@ package client import ( "io/ioutil" - "net" - "net/http" - "time" - "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" + "net/http" configv1 "github.com/openshift/api/config/v1" + "github.com/openshift/library-go/pkg/network" ) // GetKubeConfigOrInClusterConfig loads in-cluster config if kubeConfigFile is empty or the file if not, @@ -101,10 +99,7 @@ func (c ClientTransportOverrides) DefaultClientTransport(rt http.RoundTripper) h return rt } - transport.DialContext = (&net.Dialer{ - Timeout: 30 * time.Second, - KeepAlive: 30 * time.Second, - }).DialContext + transport.DialContext = network.DefaultClientDialContext() // Hold open more internal idle connections transport.MaxIdleConnsPerHost = 100 diff --git a/pkg/network/dialer.go b/pkg/network/dialer.go new file mode 100644 index 0000000000..f19be44a3e --- /dev/null +++ b/pkg/network/dialer.go @@ -0,0 +1,13 @@ +package network + +import ( + "context" + "net" +) + +type DialContext func(ctx context.Context, network, address string) (net.Conn, error) + +// DefaultDialContext returns a DialContext function from a network dialer with default options sets. +func DefaultClientDialContext() DialContext { + return dialerWithDefaultOptions() +} diff --git a/pkg/network/dialer_linux.go b/pkg/network/dialer_linux.go new file mode 100644 index 0000000000..e3cd3f4d3c --- /dev/null +++ b/pkg/network/dialer_linux.go @@ -0,0 +1,91 @@ +// +build linux + +package network + +import ( + "context" + "net" + "os" + "syscall" + "time" + + "golang.org/x/sys/unix" +) + +func dialerWithDefaultOptions() DialContext { + nd := &net.Dialer{ + // TCP_USER_TIMEOUT does affect the behaviour of connect() which is controlled by this field so we set it to the same value + Timeout: 25 * time.Second, + } + return wrapDialContext(nd.DialContext) +} + +func wrapDialContext(dc DialContext) DialContext { + return func(ctx context.Context, network, address string) (net.Conn, error) { + conn, err := dc(ctx, network, address) + if err != nil { + return conn, err + } + + if tcpCon, ok := conn.(*net.TCPConn); ok { + tcpFD, err := tcpCon.File() + if err != nil { + return conn, err + } + if err := setDefaultSocketOptions(int(tcpFD.Fd())); err != nil { + return conn, err + } + } + return conn, nil + } +} + +// setDefaultSocketOptions sets custom socket options so that we can detect connections to an unhealthy (dead) peer quickly. +// In particular we set TCP_USER_TIMEOUT that specifies the maximum amount of time that transmitted data may remain +// unacknowledged before TCP will forcibly close the connection. +// +// Note +// TCP_USER_TIMEOUT can't be too low because a single dropped packet might drop the entire connection. +// Ideally it should be set to: TCP_KEEPIDLE + TCP_KEEPINTVL * TCP_KEEPCNT +func setDefaultSocketOptions(fd int) error { + // specifies the maximum amount of time in milliseconds that transmitted data may remain + // unacknowledged before TCP will forcibly close the corresponding connection and return ETIMEDOUT to the application + tcpUserTimeoutInMilliSeconds := int(25 * time.Second / time.Millisecond) + + // specifies the interval at which probes are sent in seconds + tcpKeepIntvl := int(roundDuration(5*time.Second, time.Second)) + + // specifies the threshold for sending the first KEEP ALIVE probe in seconds + tcpKeepIdle := int(roundDuration(2*time.Second, time.Second)) + + if err := syscall.SetsockoptInt(int(fd), syscall.IPPROTO_TCP, unix.TCP_USER_TIMEOUT, tcpUserTimeoutInMilliSeconds); err != nil { + return wrapSyscallError("setsockopt", err) + } + + if err := syscall.SetsockoptInt(int(fd), syscall.IPPROTO_TCP, syscall.TCP_KEEPINTVL, tcpKeepIntvl); err != nil { + return wrapSyscallError("setsockopt", err) + } + + if err := syscall.SetsockoptInt(int(fd), syscall.IPPROTO_TCP, syscall.TCP_KEEPIDLE, tcpKeepIdle); err != nil { + return wrapSyscallError("setsockopt", err) + } + return nil +} + +// roundDurationUp rounds d to the next multiple of to. +// +// note that it was copied from the std library +func roundDuration(d time.Duration, to time.Duration) time.Duration { + return (d + to - 1) / to +} + +// wrapSyscallError takes an error and a syscall name. If the error is +// a syscall.Errno, it wraps it in a os.SyscallError using the syscall name. +// +// note that it was copied from the std library +func wrapSyscallError(name string, err error) error { + if _, ok := err.(syscall.Errno); ok { + err = os.NewSyscallError(name, err) + } + return err +} diff --git a/pkg/network/dialer_others.go b/pkg/network/dialer_others.go new file mode 100644 index 0000000000..6519b0986d --- /dev/null +++ b/pkg/network/dialer_others.go @@ -0,0 +1,19 @@ +// +build !linux + +package network + +import ( + "net" + "time" + + "k8s.io/klog/v2" +) + +func dialerWithDefaultOptions() DialContext { + klog.V(2).Info("Creating the default network Dialer (unsupported platform). It may take up to 15 minutes to detect broken connections and establish a new one") + nd := &net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + } + return nd.DialContext +}