From 2a4b9c7a6587839963429fafaf1409de0cd19975 Mon Sep 17 00:00:00 2001 From: Lukasz Szaszkiewicz Date: Wed, 9 Dec 2020 13:10:40 +0100 Subject: [PATCH 1/5] UPSTREAM: 95002: stops puting a stacktrace of aborted requests in the logs --- .../cmd/controller-manager/app/serve.go | 2 +- .../cmd/kube-scheduler/app/server.go | 2 +- .../kubeapiserver/server/insecure_handler.go | 5 +- .../pkg/endpoints/metrics/metrics.go | 27 +++ .../src/k8s.io/apiserver/pkg/server/config.go | 2 +- .../k8s.io/apiserver/pkg/server/filters/BUILD | 1 + .../apiserver/pkg/server/filters/timeout.go | 22 +- .../pkg/server/filters/timeout_test.go | 218 ++++++++++++++++++ .../apiserver/pkg/server/filters/wrap.go | 26 ++- 9 files changed, 284 insertions(+), 21 deletions(-) diff --git a/vendor/k8s.io/kubernetes/cmd/controller-manager/app/serve.go b/vendor/k8s.io/kubernetes/cmd/controller-manager/app/serve.go index 15c3ba031b8b..48d7ab0aea6b 100644 --- a/vendor/k8s.io/kubernetes/cmd/controller-manager/app/serve.go +++ b/vendor/k8s.io/kubernetes/cmd/controller-manager/app/serve.go @@ -48,7 +48,7 @@ func BuildHandlerChain(apiHandler http.Handler, authorizationInfo *apiserver.Aut } handler = genericapifilters.WithRequestInfo(handler, requestInfoResolver) handler = genericapifilters.WithCacheControl(handler) - handler = genericfilters.WithPanicRecovery(handler) + handler = genericfilters.WithPanicRecovery(handler, requestInfoResolver) return handler } diff --git a/vendor/k8s.io/kubernetes/cmd/kube-scheduler/app/server.go b/vendor/k8s.io/kubernetes/cmd/kube-scheduler/app/server.go index 25305ef1a379..9f43ed4940c1 100644 --- a/vendor/k8s.io/kubernetes/cmd/kube-scheduler/app/server.go +++ b/vendor/k8s.io/kubernetes/cmd/kube-scheduler/app/server.go @@ -283,7 +283,7 @@ func buildHandlerChain(handler http.Handler, authn authenticator.Request, authz handler = genericapifilters.WithAuthentication(handler, authn, failedHandler, nil) handler = genericapifilters.WithRequestInfo(handler, requestInfoResolver) handler = genericapifilters.WithCacheControl(handler) - handler = genericfilters.WithPanicRecovery(handler) + handler = genericfilters.WithPanicRecovery(handler, requestInfoResolver) return handler } diff --git a/vendor/k8s.io/kubernetes/pkg/kubeapiserver/server/insecure_handler.go b/vendor/k8s.io/kubernetes/pkg/kubeapiserver/server/insecure_handler.go index c3b87bc9edb0..b66a61d85132 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubeapiserver/server/insecure_handler.go +++ b/vendor/k8s.io/kubernetes/pkg/kubeapiserver/server/insecure_handler.go @@ -26,6 +26,7 @@ import ( // BuildInsecureHandlerChain sets up the server to listen to http. Should be removed. func BuildInsecureHandlerChain(apiHandler http.Handler, c *server.Config) http.Handler { + requestInfoResolver := server.NewRequestInfoResolver(c) handler := apiHandler // Temporarily disable APIPriorityAndFairness during development // so that /debug/pprof works even while this feature is totally @@ -40,9 +41,9 @@ func BuildInsecureHandlerChain(apiHandler http.Handler, c *server.Config) http.H handler = genericfilters.WithCORS(handler, c.CorsAllowedOriginList, nil, nil, nil, "true") handler = genericfilters.WithTimeoutForNonLongRunningRequests(handler, c.LongRunningFunc, c.RequestTimeout) handler = genericfilters.WithWaitGroup(handler, c.LongRunningFunc, c.HandlerChainWaitGroup) - handler = genericapifilters.WithRequestInfo(handler, server.NewRequestInfoResolver(c)) + handler = genericapifilters.WithRequestInfo(handler, requestInfoResolver) handler = genericapifilters.WithCacheControl(handler) - handler = genericfilters.WithPanicRecovery(handler) + handler = genericfilters.WithPanicRecovery(handler, requestInfoResolver) return handler } diff --git a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go index c79efdef4e3f..e593dc6bba8e 100644 --- a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go +++ b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go @@ -158,6 +158,16 @@ var ( }, []string{"verb", "group", "version", "resource", "subresource", "scope", "component", "code"}, ) + // requestAbortsTotal is a number of aborted requests with http.ErrAbortHandler + requestAbortsTotal = compbasemetrics.NewCounterVec( + &compbasemetrics.CounterOpts{ + Name: "apiserver_request_aborts_total", + Help: "Number of requests which apiserver aborted possibly due to a timeout, for each group, version, verb, resource, subresource and scope", + StabilityLevel: compbasemetrics.ALPHA, + }, + []string{"verb", "group", "version", "resource", "subresource", "scope"}, + ) + kubectlExeRegexp = regexp.MustCompile(`^.*((?i:kubectl\.exe))`) metrics = []resettableCollector{ @@ -171,6 +181,7 @@ var ( WatchEventsSizes, currentInflightRequests, requestTerminationsTotal, + requestAbortsTotal, } ) @@ -204,6 +215,22 @@ func UpdateInflightRequestMetrics(nonmutating, mutating int) { currentInflightRequests.WithLabelValues(MutatingKind).Set(float64(mutating)) } +// RecordRequestAbort records that the request was aborted possibly due to a timeout. +func RecordRequestAbort(req *http.Request, requestInfo *request.RequestInfo) { + if requestInfo == nil { + requestInfo = &request.RequestInfo{Verb: req.Method, Path: req.URL.Path} + } + + scope := CleanScope(requestInfo) + reportedVerb := cleanVerb(canonicalVerb(strings.ToUpper(req.Method), scope), req) + resource := requestInfo.Resource + subresource := requestInfo.Subresource + group := requestInfo.APIGroup + version := requestInfo.APIVersion + + requestAbortsTotal.WithLabelValues(reportedVerb, group, version, resource, subresource, scope).Inc() +} + // RecordRequestTermination records that the request was terminated early as part of a resource // preservation or apiserver self-defense mechanism (e.g. timeouts, maxinflight throttling, // proxyHandler errors). RecordRequestTermination should only be called zero or one times diff --git a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/config.go b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/config.go index 01bdf398e424..e396cd6107df 100644 --- a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/config.go +++ b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/config.go @@ -756,7 +756,7 @@ func DefaultBuildHandlerChain(apiHandler http.Handler, c *Config) http.Handler { handler = genericfilters.WithProbabilisticGoaway(handler, c.GoawayChance) } handler = genericapifilters.WithCacheControl(handler) - handler = genericfilters.WithPanicRecovery(handler) + handler = genericfilters.WithPanicRecovery(handler, c.RequestInfoResolver) return handler } diff --git a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/BUILD b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/BUILD index 69c80d3639cd..a36f13cfe58a 100644 --- a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/BUILD +++ b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/BUILD @@ -27,6 +27,7 @@ go_test( "//staging/src/k8s.io/apiserver/pkg/endpoints/filters:go_default_library", "//staging/src/k8s.io/apiserver/pkg/endpoints/request:go_default_library", "//vendor/golang.org/x/net/http2:go_default_library", + "//vendor/k8s.io/klog/v2:go_default_library", ], ) diff --git a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/timeout.go b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/timeout.go index 9b8d6d4b138d..2405bfd1ff92 100644 --- a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/timeout.go +++ b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/timeout.go @@ -33,8 +33,6 @@ import ( apirequest "k8s.io/apiserver/pkg/endpoints/request" ) -var errConnKilled = fmt.Errorf("killing connection/stream because serving request timed out and response had been started") - // WithTimeoutForNonLongRunningRequests times out non-long-running requests after the time given by timeout. func WithTimeoutForNonLongRunningRequests(handler http.Handler, longRunning apirequest.LongRunningRequestCheck, timeout time.Duration) http.Handler { if longRunning == nil { @@ -246,15 +244,17 @@ func (tw *baseTimeoutWriter) timeout(err *apierrors.StatusError) { // no way to timeout the HTTP request at the point. We have to shutdown // the connection for HTTP1 or reset stream for HTTP2. // - // Note from: Brad Fitzpatrick - // if the ServeHTTP goroutine panics, that will do the best possible thing for both - // HTTP/1 and HTTP/2. In HTTP/1, assuming you're replying with at least HTTP/1.1 and - // you've already flushed the headers so it's using HTTP chunking, it'll kill the TCP - // connection immediately without a proper 0-byte EOF chunk, so the peer will recognize - // the response as bogus. In HTTP/2 the server will just RST_STREAM the stream, leaving - // the TCP connection open, but resetting the stream to the peer so it'll have an error, - // like the HTTP/1 case. - panic(errConnKilled) + // Note from the golang's docs: + // If ServeHTTP panics, the server (the caller of ServeHTTP) assumes + // that the effect of the panic was isolated to the active request. + // It recovers the panic, logs a stack trace to the server error log, + // and either closes the network connection or sends an HTTP/2 + // RST_STREAM, depending on the HTTP protocol. To abort a handler so + // the client sees an interrupted response but the server doesn't log + // an error, panic with the value ErrAbortHandler. + // + // We are throwing http.ErrAbortHandler deliberately so that a client is notified and to suppress a not helpful stacktrace in the logs + panic(http.ErrAbortHandler) } } diff --git a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go index 4ad8717c0188..2501705729ea 100644 --- a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go +++ b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go @@ -17,22 +17,34 @@ limitations under the License. package filters import ( + "bytes" + "crypto/tls" + "crypto/x509" "encoding/json" "fmt" + "io" "io/ioutil" + "net" "net/http" "net/http/httptest" + "net/http/httptrace" + "os" "reflect" "strings" "sync" "testing" "time" + "golang.org/x/net/http2" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/diff" "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apiserver/pkg/endpoints/request" + "k8s.io/klog" ) type recorder struct { @@ -175,3 +187,209 @@ func TestTimeout(t *testing.T) { t.Fatalf("expected to see a handler panic, but didn't") } } + +func captureStdErr() (func() string, func(), error) { + var buf bytes.Buffer + reader, writer, err := os.Pipe() + if err != nil { + return nil, nil, err + } + stderr := os.Stderr + readerClosedCh := make(chan struct{}) + stopReadingStdErr := func() string { + writer.Close() + <-readerClosedCh + return buf.String() + } + klog.LogToStderr(true) + cleanUp := func() { + os.Stderr = stderr + klog.LogToStderr(false) + stopReadingStdErr() + } + os.Stderr = writer + go func() { + io.Copy(&buf, reader) + readerClosedCh <- struct{}{} + close(readerClosedCh) + }() + klog.LogToStderr(true) + + return stopReadingStdErr, cleanUp, nil +} + +func TestErrConnKilled(t *testing.T) { + readStdErr, cleanUp, err := captureStdErr() + if err != nil { + t.Fatalf("unable to setup the test, err %v", err) + } + defer cleanUp() + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // this error must be ignored by the WithPanicRecovery handler + // it is thrown by WithTimeoutForNonLongRunningRequests handler when a response has been already sent to the client and the handler timed out + // panicking with http.ErrAbortHandler also suppresses logging of a stack trace to the server's error log and closes the underlying connection + w.Write([]byte("hello from the handler")) + panic(http.ErrAbortHandler) + }) + resolver := &request.RequestInfoFactory{ + APIPrefixes: sets.NewString("api", "apis"), + GrouplessAPIPrefixes: sets.NewString("api"), + } + + ts := httptest.NewServer(WithPanicRecovery(handler, nil, resolver)) + defer ts.Close() + + _, err = http.Get(ts.URL) + if err == nil { + t.Fatal("expected to receive an error") + } + + capturedOutput := readStdErr() + if len(capturedOutput) > 0 { + t.Errorf("unexpected output captured actual = %v", capturedOutput) + } +} + +type panicOnNonReuseTransport struct { + Transport http.RoundTripper + gotConnSeen bool +} + +func (t *panicOnNonReuseTransport) RoundTrip(req *http.Request) (*http.Response, error) { + return t.Transport.RoundTrip(req) +} + +func (t *panicOnNonReuseTransport) GotConn(info httptrace.GotConnInfo) { + if !t.gotConnSeen { + t.gotConnSeen = true + return + } + if !info.Reused { + panic(fmt.Sprintf("expected the connection to be reused, info %#v", info)) + } +} + +// TestErrConnKilledHTTP2 check if HTTP/2 connection is not closed when an HTTP handler panics +// The net/http library recovers the panic and sends an HTTP/2 RST_STREAM. +func TestErrConnKilledHTTP2(t *testing.T) { + readStdErr, cleanUp, err := captureStdErr() + if err != nil { + t.Fatalf("unable to setup the test, err %v", err) + } + defer cleanUp() + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // this error must be ignored by the WithPanicRecovery handler + // it is thrown by WithTimeoutForNonLongRunningRequests handler when a response has been already sent to the client and the handler timed out + // panicking with http.ErrAbortHandler also suppresses logging of a stack trace to the server's error log and closes the underlying connection + w.Write([]byte("hello from the handler")) + panic(http.ErrAbortHandler) + }) + resolver := &request.RequestInfoFactory{ + APIPrefixes: sets.NewString("api", "apis"), + GrouplessAPIPrefixes: sets.NewString("api"), + } + + // test server + ts := httptest.NewUnstartedServer(WithPanicRecovery(handler, nil, resolver)) + tsCert, err := tls.X509KeyPair(tsCrt, tsKey) + if err != nil { + t.Fatalf("backend: invalid x509/key pair: %v", err) + } + ts.TLS = &tls.Config{ + Certificates: []tls.Certificate{tsCert}, + NextProtos: []string{http2.NextProtoTLS}, + } + ts.StartTLS() + defer ts.Close() + + newServerRequest := func(tr *panicOnNonReuseTransport) *http.Request { + req, _ := http.NewRequest("GET", fmt.Sprintf("https://127.0.0.1:%d", ts.Listener.Addr().(*net.TCPAddr).Port), nil) + trace := &httptrace.ClientTrace{ + GotConn: tr.GotConn, + } + return req.WithContext(httptrace.WithClientTrace(req.Context(), trace)) + } + + // client + clientCACertPool := x509.NewCertPool() + clientCACertPool.AppendCertsFromPEM(tsCrt) + clientTLSConfig := &tls.Config{ + RootCAs: clientCACertPool, + NextProtos: []string{http2.NextProtoTLS}, + } + tr := &panicOnNonReuseTransport{} + client := &http.Client{} + tr.Transport = &http2.Transport{ + TLSClientConfig: clientTLSConfig, + } + client.Transport = tr + + // act + _, err = client.Do(newServerRequest(tr)) + if err == nil { + t.Fatal("expected to receive an error") + } + + capturedOutput := readStdErr() + if len(capturedOutput) > 0 { + t.Errorf("unexpected output captured actual = %v", capturedOutput) + } + + // make another req to the server + // the connection should be reused + // the client uses a custom transport that checks and panics when the con wasn't reused. + _, err = client.Do(newServerRequest(tr)) + if err == nil { + t.Fatal("expected to receive an error") + } +} + +var tsCrt = []byte(`-----BEGIN CERTIFICATE----- +MIIDTjCCAjagAwIBAgIJAJdcQEBN2CjoMA0GCSqGSIb3DQEBCwUAMFAxCzAJBgNV +BAYTAlBMMQ8wDQYDVQQIDAZQb2xhbmQxDzANBgNVBAcMBkdkYW5zazELMAkGA1UE +CgwCU0sxEjAQBgNVBAMMCWxvY2FsaG9zdDAeFw0yMDA5MjgxMTU1MjhaFw0zMDA5 +MjYxMTU1MjhaMFAxCzAJBgNVBAYTAlBMMQ8wDQYDVQQIDAZQb2xhbmQxDzANBgNV +BAcMBkdkYW5zazELMAkGA1UECgwCU0sxEjAQBgNVBAMMCWxvY2FsaG9zdDCCASIw +DQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMr6b/uTHkIDEd88x3t3jnroOVwh +jWMwZ6qXN2NV/If1L9FNvtoZzZi6yCDE1uLdD1kWZ0R2XOPEwUPn+Z8A/lg9kF8J +GloLCF8q+XeYp8aWRKzwtdi+MPaKFf0wsuxEEHU4pypFrszNY0yLRbWAbMtgBFy0 +KhyNGahFO9V69cRHUj6EJ9kSBg0nG5bsypon2rinzKpUrzAEl2MbM3F34Zit5yOv +rYQcbDME+9XmOJPD97XBvMZCbmPnmpst3tX7ZhdKgSKtIjoYt+d//wtPMXOhrRzM +xcc6HuIHAovtB4kvZl5wvVU8ra8DKZviYyjfW36kQHo+yFwP3XXZFWezZi0CAwEA +AaMrMCkwCQYDVR0TBAIwADALBgNVHQ8EBAMCBaAwDwYDVR0RBAgwBocEfwAAATAN +BgkqhkiG9w0BAQsFAAOCAQEAMoAlhZ6yRwPmFL2ql9ZYWqaPu2NC4dXYV6kUIXUA +pG3IqFWb3L4ePkgYBMDlJGpAJcXlSSgEGiLj+3qQojawIMzzxmqr2KX888S5Mr+9 +I1qseRwcftwYqV/ewJSWE90HJ21pb1ixA6bSRJLV7DyxO6zKsdVJ4xIvehZtGbux +0RTf+8zUx8z2Goy1GUztOIqfMRt1P1hlQG0uvYsGQM84HO4+YhFwejrGaj8ajpgF +uo3B8BVHeh57FNGE6C45NkFGHq3tkNLMdAa32Az8DDvPmsJuycf6vgIfBEQxLZSF +OUKrKmtfdFv4XrInqFUYBYp5GkL8SGM2wmv6aSw9Aju4lA== +-----END CERTIFICATE-----`) + +var tsKey = []byte(`-----BEGIN PRIVATE KEY----- +MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDK+m/7kx5CAxHf +PMd7d4566DlcIY1jMGeqlzdjVfyH9S/RTb7aGc2YusggxNbi3Q9ZFmdEdlzjxMFD +5/mfAP5YPZBfCRpaCwhfKvl3mKfGlkSs8LXYvjD2ihX9MLLsRBB1OKcqRa7MzWNM +i0W1gGzLYARctCocjRmoRTvVevXER1I+hCfZEgYNJxuW7MqaJ9q4p8yqVK8wBJdj +GzNxd+GYrecjr62EHGwzBPvV5jiTw/e1wbzGQm5j55qbLd7V+2YXSoEirSI6GLfn +f/8LTzFzoa0czMXHOh7iBwKL7QeJL2ZecL1VPK2vAymb4mMo31t+pEB6PshcD911 +2RVns2YtAgMBAAECggEAA2Qx0MtBeyrf9pHmZ1q1B7qvkqmA2kJpyQDjzQYXxRHE +rcOVx8EcnUupolqHmJzG798e9JbhsHCOJhtPIWf71++XZO8bAJwklKp8JpJnYzsJ +hLY0450x5jyiZ2uT4by1Za//owYtCID6AsJk9MZjivZcvEvKVFXLMvONL2DxkEj1 +KaGQJh6/GT4jtNX07bW9+5w069KAAf+BNuqv8+Y/FseV3ovlpLTKjMV9xCCp9i62 +PJs/hs5CW2X+JCE7OCLsAiu0JTpXYyHcLwYwnCONdvj6veiMWjRyNDr3ew5NeZNf +nGU4WX7mXjPd/1OvzJy6iyrBlAA63ZfFZYjWQnfsIQKBgQDmo3AMIr+9rE79NnaD +woQyO539YSO45KSM39/Xrp/NJVpOxtzgZrYo7O6f6kQ3S5zQOddy9Oj7gN3RXhZ7 +Vi+Oja78ig7KUrqxcBiBGRsKZGm5CGdZ0EFd3rIEh4Qb+f+2c4f+6NWANb4kwvfq +K24c1o71+77lEVlzE2/L33K+mQKBgQDhTFr/f2e9gnRNX9bjF4p7DQI0RsFADjx0 +jgJYHfm/lCIdH9vf6SmmvJv2E76Bqx9XVilhav/egqKO/wzJWHyNo2RFBXNqfwoF +UxRZKgqhcU52y2LKAYoTYfodktatZk74rinMDLmA6arnlAWQELk3Mx48DlND43Zc +DUHTKcJEtQKBgQDYdL1c9mPjnEqJxMqXwEAXcPJG8hr3lMaGXDoVjxL1EsBdvK9h +f6QoZq1RsiiRiMpEdnSotAfQutHzhA0vdeSuMnTvGJbm9Zu3mc+1oZ1KNJEwkh2F +Ijmm4rFKJPEs3IVMc8NHzrdJW6b3k2/e+yGduRR08e7nx0+e+7fpq+1hyQKBgHY9 +l4h9+hkYjSdKhEG8yh3Ybu62r5eJoSremNZcLQXhnaHBZaj2+rgaRpP4OsRc5d71 +RlRtTood72iy7KgDO6MuPGKJANDEiaLPvl8pVFj0WWS5S0iPVELl6dl5hheNGSck +aKVBjF3exKYzJlQ8oqgYuOZ18jcv+p9HCePkB6P9AoGBAJSYpkNDc/lnCpfIlxVw +n+VroX6QDIMZzC7BGiUSrmVsu6xEbI+8/C7ecN2oCZZLMj96EXe6j+np4zmkQezc +c1EwB7fNAiS0fWyE2RU6QAOZJ71bDpzQa4q4DxbOkYSybGPM/nqDRwovdjUnWeuM ++vrJUjAZAPHJcvos0iylnc8E +-----END PRIVATE KEY-----`) diff --git a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/wrap.go b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/wrap.go index 96bebdbd6006..65cbff9d1e20 100644 --- a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/wrap.go +++ b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/wrap.go @@ -23,16 +23,32 @@ import ( "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apiserver/pkg/server/httplog" + "k8s.io/apiserver/pkg/endpoints/request" + "k8s.io/apiserver/pkg/endpoints/metrics" ) // WithPanicRecovery wraps an http Handler to recover and log panics (except in the special case of http.ErrAbortHandler panics, which suppress logging). -func WithPanicRecovery(handler http.Handler) http.Handler { +func WithPanicRecovery(handler http.Handler, resolver request.RequestInfoResolver) http.Handler { return withPanicRecovery(handler, func(w http.ResponseWriter, req *http.Request, err interface{}) { if err == http.ErrAbortHandler { - // honor the http.ErrAbortHandler sentinel panic value: - // ErrAbortHandler is a sentinel panic value to abort a handler. - // While any panic from ServeHTTP aborts the response to the client, - // panicking with ErrAbortHandler also suppresses logging of a stack trace to the server's error log. + // Honor the http.ErrAbortHandler sentinel panic value + // + // If ServeHTTP panics, the server (the caller of ServeHTTP) assumes + // that the effect of the panic was isolated to the active request. + // It recovers the panic, logs a stack trace to the server error log, + // and either closes the network connection or sends an HTTP/2 + // RST_STREAM, depending on the HTTP protocol. To abort a handler so + // the client sees an interrupted response but the server doesn't log + // an error, panic with the value ErrAbortHandler. + // + // Note that the ReallyCrash variable controls the behaviour of the HandleCrash function + // So it might actually crash, after calling the handlers + info, err := resolver.NewRequestInfo(req) + if err != nil { + metrics.RecordRequestAbort(req, nil) + return + } + metrics.RecordRequestAbort(req, info) return } http.Error(w, "This request caused apiserver to panic. Look in the logs for details.", http.StatusInternalServerError) From 7a0d2781e1c8e4b2472dff4ea70c124e0549d0bd Mon Sep 17 00:00:00 2001 From: Lukasz Szaszkiewicz Date: Wed, 9 Dec 2020 13:13:53 +0100 Subject: [PATCH 2/5] UPSTREAM: 96424: put a message, not a stack, in the log on a timeout --- .../apiserver/pkg/server/filters/timeout_test.go | 12 ++++++++++-- .../src/k8s.io/apiserver/pkg/server/filters/wrap.go | 13 ++++++++----- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go index 2501705729ea..7c36d32dc9e3 100644 --- a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go +++ b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go @@ -244,8 +244,12 @@ func TestErrConnKilled(t *testing.T) { t.Fatal("expected to receive an error") } + // we should only get one line for this, not the big stack from before capturedOutput := readStdErr() - if len(capturedOutput) > 0 { + if strings.Count(capturedOutput, "\n") != 1 { + t.Errorf("unexpected output captured actual = %v", capturedOutput) + } + if !strings.Contains(capturedOutput, `timeout or abort while handling: GET "/"`) { t.Errorf("unexpected output captured actual = %v", capturedOutput) } } @@ -330,8 +334,12 @@ func TestErrConnKilledHTTP2(t *testing.T) { t.Fatal("expected to receive an error") } + // we should only get one line for this, not the big stack from before capturedOutput := readStdErr() - if len(capturedOutput) > 0 { + if strings.Count(capturedOutput, "\n") != 1 { + t.Errorf("unexpected output captured actual = %v", capturedOutput) + } + if !strings.Contains(capturedOutput, `timeout or abort while handling: GET "/"`) { t.Errorf("unexpected output captured actual = %v", capturedOutput) } diff --git a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/wrap.go b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/wrap.go index 65cbff9d1e20..abb24242c848 100644 --- a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/wrap.go +++ b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/wrap.go @@ -17,10 +17,10 @@ limitations under the License. package filters import ( + "fmt" "net/http" "k8s.io/klog" - "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apiserver/pkg/server/httplog" "k8s.io/apiserver/pkg/endpoints/request" @@ -43,12 +43,15 @@ func WithPanicRecovery(handler http.Handler, resolver request.RequestInfoResolve // // Note that the ReallyCrash variable controls the behaviour of the HandleCrash function // So it might actually crash, after calling the handlers - info, err := resolver.NewRequestInfo(req) - if err != nil { + if info, err := resolver.NewRequestInfo(req); err != nil { metrics.RecordRequestAbort(req, nil) - return + } else { + metrics.RecordRequestAbort(req, info) } - metrics.RecordRequestAbort(req, info) + // This call can have different handlers, but the default chain rate limits. Call it after the metrics are updated + // in case the rate limit delays it. If you outrun the rate for this one timed out requests, something has gone + // seriously wrong with your server, but generally having a logging signal for timeouts is useful. + runtime.HandleError(fmt.Errorf("timeout or abort while handling: %v %q", req.Method, req.URL.Path)) return } http.Error(w, "This request caused apiserver to panic. Look in the logs for details.", http.StatusInternalServerError) From 21bf0841f37a17b358b802a90de5c404c556afbc Mon Sep 17 00:00:00 2001 From: Lukasz Szaszkiewicz Date: Wed, 9 Dec 2020 13:15:34 +0100 Subject: [PATCH 3/5] UPSTREAM: 95725: DelegatingAuthorizationOptions: exposes and sets a default timeout for SubjectAccessReview client --- .../app/options/options_test.go | 2 ++ .../app/options/options_test.go | 1 + .../apiserver/pkg/server/options/authorization.go | 11 +++++++++++ 3 files changed, 14 insertions(+) diff --git a/vendor/k8s.io/kubernetes/cmd/cloud-controller-manager/app/options/options_test.go b/vendor/k8s.io/kubernetes/cmd/cloud-controller-manager/app/options/options_test.go index 272cfca5d010..00815103f21f 100644 --- a/vendor/k8s.io/kubernetes/cmd/cloud-controller-manager/app/options/options_test.go +++ b/vendor/k8s.io/kubernetes/cmd/cloud-controller-manager/app/options/options_test.go @@ -115,6 +115,7 @@ func TestDefaultFlags(t *testing.T) { Authorization: &apiserveroptions.DelegatingAuthorizationOptions{ AllowCacheTTL: 10 * time.Second, DenyCacheTTL: 10 * time.Second, + ClientTimeout: 10 * time.Second, RemoteKubeConfigFileOptional: true, AlwaysAllowPaths: []string{"/healthz"}, // note: this does not match /healthz/ or }, @@ -247,6 +248,7 @@ func TestAddFlags(t *testing.T) { Authorization: &apiserveroptions.DelegatingAuthorizationOptions{ AllowCacheTTL: 10 * time.Second, DenyCacheTTL: 10 * time.Second, + ClientTimeout: 10 * time.Second, RemoteKubeConfigFileOptional: true, AlwaysAllowPaths: []string{"/healthz"}, // note: this does not match /healthz/ or }, diff --git a/vendor/k8s.io/kubernetes/cmd/kube-controller-manager/app/options/options_test.go b/vendor/k8s.io/kubernetes/cmd/kube-controller-manager/app/options/options_test.go index dc43ee87e65d..c25441b5b8ba 100644 --- a/vendor/k8s.io/kubernetes/cmd/kube-controller-manager/app/options/options_test.go +++ b/vendor/k8s.io/kubernetes/cmd/kube-controller-manager/app/options/options_test.go @@ -376,6 +376,7 @@ func TestAddFlags(t *testing.T) { Authorization: &apiserveroptions.DelegatingAuthorizationOptions{ AllowCacheTTL: 10 * time.Second, DenyCacheTTL: 10 * time.Second, + ClientTimeout: 10 * time.Second, RemoteKubeConfigFileOptional: true, AlwaysAllowPaths: []string{"/healthz"}, // note: this does not match /healthz/ or /healthz/* }, diff --git a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/options/authorization.go b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/options/authorization.go index 90d53c9aec50..263bc14129e5 100644 --- a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/options/authorization.go +++ b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/options/authorization.go @@ -59,6 +59,10 @@ type DelegatingAuthorizationOptions struct { // AlwaysAllowGroups are groups which are allowed to take any actions. In kube, this is system:masters. AlwaysAllowGroups []string + + // ClientTimeout specifies a time limit for requests made by SubjectAccessReviews client. + // The default value is set to 10 seconds. + ClientTimeout time.Duration } func NewDelegatingAuthorizationOptions() *DelegatingAuthorizationOptions { @@ -66,6 +70,7 @@ func NewDelegatingAuthorizationOptions() *DelegatingAuthorizationOptions { // very low for responsiveness, but high enough to handle storms AllowCacheTTL: 10 * time.Second, DenyCacheTTL: 10 * time.Second, + ClientTimeout: 10 * time.Second, } } @@ -81,6 +86,11 @@ func (s *DelegatingAuthorizationOptions) WithAlwaysAllowPaths(paths ...string) * return s } +// WithClientTimeout sets the given timeout for SAR client used by this authorizer +func (s *DelegatingAuthorizationOptions) WithClientTimeout(timeout time.Duration) { + s.ClientTimeout = timeout +} + func (s *DelegatingAuthorizationOptions) Validate() []error { allErrors := []error{} return allErrors @@ -186,6 +196,7 @@ func (s *DelegatingAuthorizationOptions) getClient() (kubernetes.Interface, erro // set high qps/burst limits since this will effectively limit API server responsiveness clientConfig.QPS = 200 clientConfig.Burst = 400 + clientConfig.Timeout = s.ClientTimeout // make the client use protobuf protoConfig := rest.CopyConfig(clientConfig) From 454ee45770b8e5d5cda554387905da14f2dde6d8 Mon Sep 17 00:00:00 2001 From: Lukasz Szaszkiewicz Date: Wed, 9 Dec 2020 13:17:23 +0100 Subject: [PATCH 4/5] UPSTREAM: 96217: DelegatingAuthenticationOptions TokenReview client timeout --- .../app/options/options_test.go | 2 ++ .../app/options/options_test.go | 5 +++-- .../apiserver/pkg/server/options/authentication.go | 11 +++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/vendor/k8s.io/kubernetes/cmd/cloud-controller-manager/app/options/options_test.go b/vendor/k8s.io/kubernetes/cmd/cloud-controller-manager/app/options/options_test.go index 00815103f21f..aff167f43619 100644 --- a/vendor/k8s.io/kubernetes/cmd/cloud-controller-manager/app/options/options_test.go +++ b/vendor/k8s.io/kubernetes/cmd/cloud-controller-manager/app/options/options_test.go @@ -111,6 +111,7 @@ func TestDefaultFlags(t *testing.T) { ExtraHeaderPrefixes: []string{"x-remote-extra-"}, }, RemoteKubeConfigFileOptional: true, + ClientTimeout: 10 * time.Second, }, Authorization: &apiserveroptions.DelegatingAuthorizationOptions{ AllowCacheTTL: 10 * time.Second, @@ -244,6 +245,7 @@ func TestAddFlags(t *testing.T) { ExtraHeaderPrefixes: []string{"x-remote-extra-"}, }, RemoteKubeConfigFileOptional: true, + ClientTimeout: 10 * time.Second, }, Authorization: &apiserveroptions.DelegatingAuthorizationOptions{ AllowCacheTTL: 10 * time.Second, diff --git a/vendor/k8s.io/kubernetes/cmd/kube-controller-manager/app/options/options_test.go b/vendor/k8s.io/kubernetes/cmd/kube-controller-manager/app/options/options_test.go index c25441b5b8ba..54736fe4e7fd 100644 --- a/vendor/k8s.io/kubernetes/cmd/kube-controller-manager/app/options/options_test.go +++ b/vendor/k8s.io/kubernetes/cmd/kube-controller-manager/app/options/options_test.go @@ -364,8 +364,9 @@ func TestAddFlags(t *testing.T) { BindNetwork: "tcp", }).WithLoopback(), Authentication: &apiserveroptions.DelegatingAuthenticationOptions{ - CacheTTL: 10 * time.Second, - ClientCert: apiserveroptions.ClientCertAuthenticationOptions{}, + CacheTTL: 10 * time.Second, + ClientTimeout: 10 * time.Second, + ClientCert: apiserveroptions.ClientCertAuthenticationOptions{}, RequestHeader: apiserveroptions.RequestHeaderAuthenticationOptions{ UsernameHeaders: []string{"x-remote-user"}, GroupHeaders: []string{"x-remote-group"}, diff --git a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/options/authentication.go b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/options/authentication.go index 9a395c94a739..0d4a2dc28eee 100644 --- a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/options/authentication.go +++ b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/options/authentication.go @@ -180,6 +180,10 @@ type DelegatingAuthenticationOptions struct { // TolerateInClusterLookupFailure indicates failures to look up authentication configuration from the cluster configmap should not be fatal. // Setting this can result in an authenticator that will reject all requests. TolerateInClusterLookupFailure bool + + // ClientTimeout specifies a time limit for requests made by the authorization webhook client. + // The default value is set to 10 seconds. + ClientTimeout time.Duration } func NewDelegatingAuthenticationOptions() *DelegatingAuthenticationOptions { @@ -192,9 +196,15 @@ func NewDelegatingAuthenticationOptions() *DelegatingAuthenticationOptions { GroupHeaders: []string{"x-remote-group"}, ExtraHeaderPrefixes: []string{"x-remote-extra-"}, }, + ClientTimeout: 10 * time.Second, } } +// WithClientTimeout sets the given timeout for the authentication webhook client. +func (s *DelegatingAuthenticationOptions) WithClientTimeout(timeout time.Duration) { + s.ClientTimeout = timeout +} + func (s *DelegatingAuthenticationOptions) Validate() []error { allErrors := []error{} allErrors = append(allErrors, s.RequestHeader.Validate()...) @@ -419,6 +429,7 @@ func (s *DelegatingAuthenticationOptions) getClient() (kubernetes.Interface, err // set high qps/burst limits since this will effectively limit API server responsiveness clientConfig.QPS = 200 clientConfig.Burst = 400 + clientConfig.Timeout = s.ClientTimeout return kubernetes.NewForConfig(clientConfig) } From b2776b2f5b9ba107362c7249bdc297e1d59022b8 Mon Sep 17 00:00:00 2001 From: Lukasz Szaszkiewicz Date: Wed, 9 Dec 2020 16:29:23 +0100 Subject: [PATCH 5/5] UPSTREAM: 95002: klog v1 prints to stderr --- .../k8s.io/apiserver/pkg/server/filters/timeout_test.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go index 7c36d32dc9e3..915bc2c55336 100644 --- a/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go +++ b/vendor/k8s.io/kubernetes/staging/src/k8s.io/apiserver/pkg/server/filters/timeout_test.go @@ -44,7 +44,6 @@ import ( "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apiserver/pkg/endpoints/request" - "k8s.io/klog" ) type recorder struct { @@ -201,10 +200,8 @@ func captureStdErr() (func() string, func(), error) { <-readerClosedCh return buf.String() } - klog.LogToStderr(true) cleanUp := func() { os.Stderr = stderr - klog.LogToStderr(false) stopReadingStdErr() } os.Stderr = writer @@ -213,7 +210,6 @@ func captureStdErr() (func() string, func(), error) { readerClosedCh <- struct{}{} close(readerClosedCh) }() - klog.LogToStderr(true) return stopReadingStdErr, cleanUp, nil } @@ -236,7 +232,7 @@ func TestErrConnKilled(t *testing.T) { GrouplessAPIPrefixes: sets.NewString("api"), } - ts := httptest.NewServer(WithPanicRecovery(handler, nil, resolver)) + ts := httptest.NewServer(WithPanicRecovery(handler, resolver)) defer ts.Close() _, err = http.Get(ts.URL) @@ -294,7 +290,7 @@ func TestErrConnKilledHTTP2(t *testing.T) { } // test server - ts := httptest.NewUnstartedServer(WithPanicRecovery(handler, nil, resolver)) + ts := httptest.NewUnstartedServer(WithPanicRecovery(handler, resolver)) tsCert, err := tls.X509KeyPair(tsCrt, tsKey) if err != nil { t.Fatalf("backend: invalid x509/key pair: %v", err)