From 7c42c3ea5054c420fef3d80c4a618e7540bd3f0e Mon Sep 17 00:00:00 2001 From: Lukasz Szaszkiewicz Date: Mon, 19 Oct 2020 17:59:33 +0200 Subject: [PATCH 1/3] provides DefaultClientDialer that returns a network dialer with default options sets. --- pkg/config/client/client_config.go | 12 ++--- pkg/network/dialer.go | 10 ++++ pkg/network/dialer_linux.go | 76 ++++++++++++++++++++++++++++++ pkg/network/dialer_others.go | 18 +++++++ 4 files changed, 108 insertions(+), 8 deletions(-) create mode 100644 pkg/network/dialer.go create mode 100644 pkg/network/dialer_linux.go create mode 100644 pkg/network/dialer_others.go diff --git a/pkg/config/client/client_config.go b/pkg/config/client/client_config.go index a247311057..6a92c0c6b7 100644 --- a/pkg/config/client/client_config.go +++ b/pkg/config/client/client_config.go @@ -2,14 +2,12 @@ package client import ( "io/ioutil" - "net" - "net/http" - "time" - "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" + "net/http" configv1 "github.com/openshift/api/config/v1" + "github.com/openshift/library-go/pkg/network" ) // GetKubeConfigOrInClusterConfig loads in-cluster config if kubeConfigFile is empty or the file if not, @@ -101,10 +99,8 @@ func (c ClientTransportOverrides) DefaultClientTransport(rt http.RoundTripper) h return rt } - transport.DialContext = (&net.Dialer{ - Timeout: 30 * time.Second, - KeepAlive: 30 * time.Second, - }).DialContext + dialer := network.DefaultClientDialer() + transport.DialContext = dialer.DialContext // Hold open more internal idle connections transport.MaxIdleConnsPerHost = 100 diff --git a/pkg/network/dialer.go b/pkg/network/dialer.go new file mode 100644 index 0000000000..2e0e93cdce --- /dev/null +++ b/pkg/network/dialer.go @@ -0,0 +1,10 @@ +package network + +import ( + "net" +) + +// DefaultClientDialer returns a network dialer with default options sets. +func DefaultClientDialer() *net.Dialer { + return dialerWithDefaultOptions() +} diff --git a/pkg/network/dialer_linux.go b/pkg/network/dialer_linux.go new file mode 100644 index 0000000000..aa5d5d1d30 --- /dev/null +++ b/pkg/network/dialer_linux.go @@ -0,0 +1,76 @@ +// +build linux + +package network + +import ( + "net" + "os" + "syscall" + "time" + + "golang.org/x/sys/unix" +) + +func dialerWithDefaultOptions() *net.Dialer { + return &net.Dialer{ + // TCP_USER_TIMEOUT does affect the behaviour of connect() which is controlled by this field so we set it to the same value + Timeout: 25 * time.Second, + Control: func(network, address string, con syscall.RawConn) error { + var err error + err = con.Control(func(fd uintptr) { + err = setDefaultSocketOptions(int(fd)) + }) + return err + }, + } +} + +// setDefaultSocketOptions sets custom socket options so that we can detect connections to an unhealthy (dead) peer quickly. +// In particular we set TCP_USER_TIMEOUT that specifies the maximum amount of time that transmitted data may remain +// unacknowledged before TCP will forcibly close the connection. +// +// Note +// TCP_USER_TIMEOUT can't be too low because a single dropped packet might drop the entire connection. +// Ideally it should be set to: TCP_KEEPIDLE + TCP_KEEPINTVL * TCP_KEEPCNT +func setDefaultSocketOptions(fd int) error { + // specifies the maximum amount of time in milliseconds that transmitted data may remain + // unacknowledged before TCP will forcibly close the corresponding connection and return ETIMEDOUT to the application + tcpUserTimeoutInMilliSeconds := int(25 * time.Second / time.Millisecond) + + // specifies the interval at which probes are sent in seconds + tcpKeepIntvl := int(roundDuration(5*time.Second, time.Second)) + + // specifies the threshold for sending the first KEEP ALIVE probe in seconds + tcpKeepIdle := int(roundDuration(2*time.Second, time.Second)) + + if err := syscall.SetsockoptInt(int(fd), syscall.IPPROTO_TCP, unix.TCP_USER_TIMEOUT, tcpUserTimeoutInMilliSeconds); err != nil { + return wrapSyscallError("setsockopt", err) + } + + if err := syscall.SetsockoptInt(int(fd), syscall.IPPROTO_TCP, syscall.TCP_KEEPINTVL, tcpKeepIntvl); err != nil { + return wrapSyscallError("setsockopt", err) + } + + if err := syscall.SetsockoptInt(int(fd), syscall.IPPROTO_TCP, syscall.TCP_KEEPIDLE, tcpKeepIdle); err != nil { + return wrapSyscallError("setsockopt", err) + } + return nil +} + +// roundDurationUp rounds d to the next multiple of to. +// +// note that it was copied from the std library +func roundDuration(d time.Duration, to time.Duration) time.Duration { + return (d + to - 1) / to +} + +// wrapSyscallError takes an error and a syscall name. If the error is +// a syscall.Errno, it wraps it in a os.SyscallError using the syscall name. +// +// note that it was copied from the std library +func wrapSyscallError(name string, err error) error { + if _, ok := err.(syscall.Errno); ok { + err = os.NewSyscallError(name, err) + } + return err +} diff --git a/pkg/network/dialer_others.go b/pkg/network/dialer_others.go new file mode 100644 index 0000000000..417c3ba311 --- /dev/null +++ b/pkg/network/dialer_others.go @@ -0,0 +1,18 @@ +// +build !linux + +package network + +import ( + "net" + "time" + + "k8s.io/klog/v2" +) + +func dialerWithDefaultOptions() *net.Dialer { + klog.V(2).Info("Creating the default network Dialer (unsupported platform). It may take up to 15 minutes to detect broken connections and establish a new one") + return &net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + } +} From 96935677be683e35bc36fbb75e87c85fece76d90 Mon Sep 17 00:00:00 2001 From: Lukasz Szaszkiewicz Date: Tue, 20 Oct 2020 11:51:25 +0200 Subject: [PATCH 2/3] go mod vendor --- go.mod | 1 + 1 file changed, 1 insertion(+) diff --git a/go.mod b/go.mod index 8b76914007..63484e552c 100644 --- a/go.mod +++ b/go.mod @@ -50,6 +50,7 @@ require ( go.etcd.io/etcd v0.5.0-alpha.5.0.20200819165624-17cef6e3e9d5 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 golang.org/x/net v0.0.0-20200707034311-ab3426394381 + golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4 golang.org/x/time v0.0.0-20191024005414-555d28b269f0 gopkg.in/asn1-ber.v1 v1.0.0-20181015200546-f715ec2f112d // indirect gopkg.in/ldap.v2 v2.5.1 From e8be1278bbea83f89b05eaa47c4c874b5d18b77d Mon Sep 17 00:00:00 2001 From: Lukasz Szaszkiewicz Date: Wed, 28 Oct 2020 11:54:05 +0100 Subject: [PATCH 3/3] to DialContext --- pkg/config/client/client_config.go | 3 +-- pkg/network/dialer.go | 7 +++++-- pkg/network/dialer_linux.go | 33 ++++++++++++++++++++++-------- pkg/network/dialer_others.go | 5 +++-- 4 files changed, 33 insertions(+), 15 deletions(-) diff --git a/pkg/config/client/client_config.go b/pkg/config/client/client_config.go index 6a92c0c6b7..e2b90ca531 100644 --- a/pkg/config/client/client_config.go +++ b/pkg/config/client/client_config.go @@ -99,8 +99,7 @@ func (c ClientTransportOverrides) DefaultClientTransport(rt http.RoundTripper) h return rt } - dialer := network.DefaultClientDialer() - transport.DialContext = dialer.DialContext + transport.DialContext = network.DefaultClientDialContext() // Hold open more internal idle connections transport.MaxIdleConnsPerHost = 100 diff --git a/pkg/network/dialer.go b/pkg/network/dialer.go index 2e0e93cdce..f19be44a3e 100644 --- a/pkg/network/dialer.go +++ b/pkg/network/dialer.go @@ -1,10 +1,13 @@ package network import ( + "context" "net" ) -// DefaultClientDialer returns a network dialer with default options sets. -func DefaultClientDialer() *net.Dialer { +type DialContext func(ctx context.Context, network, address string) (net.Conn, error) + +// DefaultDialContext returns a DialContext function from a network dialer with default options sets. +func DefaultClientDialContext() DialContext { return dialerWithDefaultOptions() } diff --git a/pkg/network/dialer_linux.go b/pkg/network/dialer_linux.go index aa5d5d1d30..e3cd3f4d3c 100644 --- a/pkg/network/dialer_linux.go +++ b/pkg/network/dialer_linux.go @@ -3,6 +3,7 @@ package network import ( + "context" "net" "os" "syscall" @@ -11,17 +12,31 @@ import ( "golang.org/x/sys/unix" ) -func dialerWithDefaultOptions() *net.Dialer { - return &net.Dialer{ +func dialerWithDefaultOptions() DialContext { + nd := &net.Dialer{ // TCP_USER_TIMEOUT does affect the behaviour of connect() which is controlled by this field so we set it to the same value Timeout: 25 * time.Second, - Control: func(network, address string, con syscall.RawConn) error { - var err error - err = con.Control(func(fd uintptr) { - err = setDefaultSocketOptions(int(fd)) - }) - return err - }, + } + return wrapDialContext(nd.DialContext) +} + +func wrapDialContext(dc DialContext) DialContext { + return func(ctx context.Context, network, address string) (net.Conn, error) { + conn, err := dc(ctx, network, address) + if err != nil { + return conn, err + } + + if tcpCon, ok := conn.(*net.TCPConn); ok { + tcpFD, err := tcpCon.File() + if err != nil { + return conn, err + } + if err := setDefaultSocketOptions(int(tcpFD.Fd())); err != nil { + return conn, err + } + } + return conn, nil } } diff --git a/pkg/network/dialer_others.go b/pkg/network/dialer_others.go index 417c3ba311..6519b0986d 100644 --- a/pkg/network/dialer_others.go +++ b/pkg/network/dialer_others.go @@ -9,10 +9,11 @@ import ( "k8s.io/klog/v2" ) -func dialerWithDefaultOptions() *net.Dialer { +func dialerWithDefaultOptions() DialContext { klog.V(2).Info("Creating the default network Dialer (unsupported platform). It may take up to 15 minutes to detect broken connections and establish a new one") - return &net.Dialer{ + nd := &net.Dialer{ Timeout: 30 * time.Second, KeepAlive: 30 * time.Second, } + return nd.DialContext }