diff --git a/go.mod b/go.mod index d4c4b70c93..b80c61fb54 100644 --- a/go.mod +++ b/go.mod @@ -53,6 +53,7 @@ require ( github.com/xlab/handysort v0.0.0-20150421192137-fb3537ed64a1 // indirect golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550 golang.org/x/net v0.0.0-20200202094626-16171245cfb2 + golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 gopkg.in/asn1-ber.v1 v1.0.0-20181015200546-f715ec2f112d // indirect gopkg.in/ldap.v2 v2.5.1 diff --git a/pkg/config/client/client_config.go b/pkg/config/client/client_config.go index a247311057..e2b90ca531 100644 --- a/pkg/config/client/client_config.go +++ b/pkg/config/client/client_config.go @@ -2,14 +2,12 @@ package client import ( "io/ioutil" - "net" - "net/http" - "time" - "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" + "net/http" configv1 "github.com/openshift/api/config/v1" + "github.com/openshift/library-go/pkg/network" ) // GetKubeConfigOrInClusterConfig loads in-cluster config if kubeConfigFile is empty or the file if not, @@ -101,10 +99,7 @@ func (c ClientTransportOverrides) DefaultClientTransport(rt http.RoundTripper) h return rt } - transport.DialContext = (&net.Dialer{ - Timeout: 30 * time.Second, - KeepAlive: 30 * time.Second, - }).DialContext + transport.DialContext = network.DefaultClientDialContext() // Hold open more internal idle connections transport.MaxIdleConnsPerHost = 100 diff --git a/pkg/network/dialer.go b/pkg/network/dialer.go new file mode 100644 index 0000000000..f19be44a3e --- /dev/null +++ b/pkg/network/dialer.go @@ -0,0 +1,13 @@ +package network + +import ( + "context" + "net" +) + +type DialContext func(ctx context.Context, network, address string) (net.Conn, error) + +// DefaultDialContext returns a DialContext function from a network dialer with default options sets. +func DefaultClientDialContext() DialContext { + return dialerWithDefaultOptions() +} diff --git a/pkg/network/dialer_linux.go b/pkg/network/dialer_linux.go new file mode 100644 index 0000000000..b8ff8db85e --- /dev/null +++ b/pkg/network/dialer_linux.go @@ -0,0 +1,93 @@ +// +build linux + +package network + +import ( + "net" + "os" + "syscall" + "time" + + "golang.org/x/sys/unix" + + utilerrors "k8s.io/apimachinery/pkg/util/errors" +) + +func dialerWithDefaultOptions() DialContext { + nd := &net.Dialer{ + // TCP_USER_TIMEOUT does affect the behaviour of connect() which is controlled by this field so we set it to the same value + Timeout: 25 * time.Second, + // KeepAlive must to be set to a negative value to stop std library from applying the default values + // by doing so we ensure that the options we are interested in won't be overwritten + KeepAlive: time.Duration(-1), + Control: func(network, address string, con syscall.RawConn) error { + var errs []error + err := con.Control(func(fd uintptr) { + optionsErr := setDefaultSocketOptions(int(fd)) + if optionsErr != nil { + errs = append(errs, optionsErr) + } + }) + if err != nil { + errs = append(errs, err) + } + return utilerrors.NewAggregate(errs) + }, + } + return nd.DialContext +} + +// setDefaultSocketOptions sets custom socket options so that we can detect connections to an unhealthy (dead) peer quickly. +// In particular we set TCP_USER_TIMEOUT that specifies the maximum amount of time that transmitted data may remain +// unacknowledged before TCP will forcibly close the connection. +// +// Note +// TCP_USER_TIMEOUT can't be too low because a single dropped packet might drop the entire connection. +// Ideally it should be set to: TCP_KEEPIDLE + TCP_KEEPINTVL * TCP_KEEPCNT +func setDefaultSocketOptions(fd int) error { + // specifies the maximum amount of time in milliseconds that transmitted data may remain + // unacknowledged before TCP will forcibly close the corresponding connection and return ETIMEDOUT to the application + tcpUserTimeoutInMilliSeconds := int(25 * time.Second / time.Millisecond) + + // specifies the interval at which probes are sent in seconds + tcpKeepIntvl := int(roundDuration(5*time.Second, time.Second)) + + // specifies the threshold for sending the first KEEP ALIVE probe in seconds + tcpKeepIdle := int(roundDuration(2*time.Second, time.Second)) + + // enable keep-alive probes + if err := syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_KEEPALIVE, 1); err != nil { + return wrapSyscallError("setsockopt", err) + } + + if err := syscall.SetsockoptInt(int(fd), syscall.IPPROTO_TCP, unix.TCP_USER_TIMEOUT, tcpUserTimeoutInMilliSeconds); err != nil { + return wrapSyscallError("setsockopt", err) + } + + if err := syscall.SetsockoptInt(int(fd), syscall.IPPROTO_TCP, syscall.TCP_KEEPINTVL, tcpKeepIntvl); err != nil { + return wrapSyscallError("setsockopt", err) + } + + if err := syscall.SetsockoptInt(int(fd), syscall.IPPROTO_TCP, syscall.TCP_KEEPIDLE, tcpKeepIdle); err != nil { + return wrapSyscallError("setsockopt", err) + } + return nil +} + +// roundDurationUp rounds d to the next multiple of to. +// +// note that it was copied from the std library +func roundDuration(d time.Duration, to time.Duration) time.Duration { + return (d + to - 1) / to +} + +// wrapSyscallError takes an error and a syscall name. If the error is +// a syscall.Errno, it wraps it in a os.SyscallError using the syscall name. +// +// note that it was copied from the std library +func wrapSyscallError(name string, err error) error { + if _, ok := err.(syscall.Errno); ok { + err = os.NewSyscallError(name, err) + } + return err +} diff --git a/pkg/network/dialer_others.go b/pkg/network/dialer_others.go new file mode 100644 index 0000000000..75f0084540 --- /dev/null +++ b/pkg/network/dialer_others.go @@ -0,0 +1,19 @@ +// +build !linux + +package network + +import ( + "net" + "time" + + "k8s.io/klog" +) + +func dialerWithDefaultOptions() DialContext { + klog.V(2).Info("Creating the default network Dialer (unsupported platform). It may take up to 15 minutes to detect broken connections and establish a new one") + nd := &net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + } + return nd.DialContext +}