Skip to content

Commit f706e93

Browse files
committed
Add retries for temporary network issues
We pretty commonly see this issue when communicating with a registry: ``` Get https://gcr.io/v2/: net/http: TLS handshake timeout ``` This causes a huge amount of pain if it's a failure within e.g. a large CI job, since the entire thing would fail. Fixes google#77
1 parent abf9ef0 commit f706e93

File tree

10 files changed

+410
-21
lines changed

10 files changed

+410
-21
lines changed

pkg/internal/retry/options.go

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Copyright 2019 Google LLC All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package retry
16+
17+
import (
18+
"time"
19+
20+
"k8s.io/apimachinery/pkg/util/wait"
21+
)
22+
23+
var (
24+
defaultBackoff = wait.Backoff{
25+
Duration: 100 * time.Millisecond,
26+
Steps: 2,
27+
}
28+
29+
defaultPredicate = IsNotNil
30+
)
31+
32+
// Predicate determines whether an error should be retried.
33+
type Predicate func(error) (retry bool)
34+
35+
// Option is a functional option for retry operations.
36+
type Option func(*options)
37+
38+
type options struct {
39+
backoff wait.Backoff
40+
predicate Predicate
41+
}
42+
43+
func makeOptions(opts ...Option) *options {
44+
o := &options{
45+
backoff: defaultBackoff,
46+
predicate: defaultPredicate,
47+
}
48+
for _, option := range opts {
49+
option(o)
50+
}
51+
return o
52+
}
53+
54+
// WithBackoff sets the backoff for retry operations.
55+
//
56+
// By default, just retry the operation once after waiting 100ms. This is not
57+
// perfect for most situations and should be overridden by using this option.
58+
func WithBackoff(backoff wait.Backoff) Option {
59+
return func(o *options) {
60+
o.backoff = backoff
61+
}
62+
}
63+
64+
// WithPredicate sets the predicate for retry operations.
65+
//
66+
// By default, just retry all errors. This is not perfect for most situations
67+
// and should be overridden by using this option.
68+
func WithPredicate(predicate Predicate) Option {
69+
return func(o *options) {
70+
o.predicate = predicate
71+
}
72+
}

pkg/internal/retry/retry.go

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// Copyright 2019 Google LLC All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// Package retry provides methods for retrying operations. It is a thin wrapper
16+
// around k8s.io/apimachinery/pkg/util/wait to make certain operations easier.
17+
package retry
18+
19+
import (
20+
"fmt"
21+
22+
"k8s.io/apimachinery/pkg/util/wait"
23+
)
24+
25+
// This is implemented by several errors in the net package as well as our
26+
// transport.Error.
27+
type temporary interface {
28+
Temporary() bool
29+
}
30+
31+
// IsTemporary is true if err implements temporary and Temporary() returns true.
32+
func IsTemporary(err error) bool {
33+
if te, ok := err.(temporary); ok && te.Temporary() {
34+
return true
35+
}
36+
return false
37+
}
38+
39+
func IsNotNil(err error) bool {
40+
return err != nil
41+
}
42+
43+
// Retry retries a given function, f, until a predicate is satisfied, using
44+
// exponential backoff. If the predicate is never satisfied, it will return the
45+
// last error returned by f.
46+
func Retry(f func() error, opts ...Option) (err error) {
47+
if f == nil {
48+
return fmt.Errorf("nil func passed to retry")
49+
}
50+
51+
o := makeOptions(opts...)
52+
53+
condition := func() (bool, error) {
54+
err = f()
55+
if o.predicate(err) {
56+
return false, nil
57+
}
58+
return true, err
59+
}
60+
61+
wait.ExponentialBackoff(o.backoff, condition)
62+
return
63+
}

pkg/internal/retry/retry_test.go

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
// Copyright 2018 Google LLC All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package retry
16+
17+
import (
18+
"fmt"
19+
"testing"
20+
21+
"k8s.io/apimachinery/pkg/util/wait"
22+
)
23+
24+
type temp struct{}
25+
26+
func (e temp) Error() string {
27+
return "temporary error"
28+
}
29+
30+
func (e temp) Temporary() bool {
31+
return true
32+
}
33+
34+
func TestRetry(t *testing.T) {
35+
for i, test := range []struct {
36+
predicate Predicate
37+
err error
38+
shouldRetry bool
39+
}{{
40+
predicate: IsTemporary,
41+
err: nil,
42+
shouldRetry: false,
43+
}, {
44+
predicate: IsTemporary,
45+
err: fmt.Errorf("not temporary"),
46+
shouldRetry: false,
47+
}, {
48+
predicate: IsNotNil,
49+
err: fmt.Errorf("not temporary"),
50+
shouldRetry: true,
51+
}, {
52+
predicate: IsTemporary,
53+
err: temp{},
54+
shouldRetry: true,
55+
}} {
56+
// Make sure we retry 5 times if we shouldRetry.
57+
steps := 5
58+
backoff := wait.Backoff{
59+
Steps: steps,
60+
}
61+
62+
// Count how many times this function is invoked.
63+
count := 0
64+
f := func() error {
65+
count++
66+
return test.err
67+
}
68+
69+
Retry(f, WithBackoff(backoff), WithPredicate(test.predicate))
70+
71+
if test.shouldRetry && count != steps {
72+
t.Errorf("expected %d to retry %v, did not", i, test.err)
73+
} else if !test.shouldRetry && count == steps {
74+
t.Errorf("expected %d not to retry %v, but did", i, test.err)
75+
}
76+
}
77+
}
78+
79+
// Make sure we don't panic.
80+
func TestNilF(t *testing.T) {
81+
if err := Retry(nil); err == nil {
82+
t.Errorf("got nil when passing in nil f")
83+
}
84+
}

pkg/v1/google/list.go

+3
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ func newLister(repo name.Repository, options ...ListerOption) (*lister, error) {
5151
}
5252
}
5353

54+
// Wrap the transport in something that can retry network flakes.
55+
l.transport = transport.NewRetry(l.transport)
56+
5457
scopes := []string{repo.Scope(transport.PullScope)}
5558
tr, err := transport.New(repo.Registry, l.auth, l.transport, scopes)
5659
if err != nil {

pkg/v1/remote/options.go

+4
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"github.com/google/go-containerregistry/pkg/authn"
2222
"github.com/google/go-containerregistry/pkg/name"
2323
v1 "github.com/google/go-containerregistry/pkg/v1"
24+
"github.com/google/go-containerregistry/pkg/v1/remote/transport"
2425
)
2526

2627
// Option is a functional option for remote operations.
@@ -57,6 +58,9 @@ func makeOptions(reg name.Registry, opts ...Option) (*options, error) {
5758
o.auth = auth
5859
}
5960

61+
// Wrap the transport in something that can retry network flakes.
62+
o.transport = transport.NewRetry(o.transport)
63+
6064
return o, nil
6165
}
6266

pkg/v1/remote/transport/error.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ func (e *Error) Error() string {
4848
}
4949
}
5050

51-
// ShouldRetry returns whether the request that preceded the error should be retried.
52-
func (e *Error) ShouldRetry() bool {
51+
// Temporary returns whether the request that preceded the error is temporary.
52+
func (e *Error) Temporary() bool {
5353
if len(e.Errors) == 0 {
5454
return false
5555
}

pkg/v1/remote/transport/error_test.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import (
2424
"github.com/google/go-cmp/cmp"
2525
)
2626

27-
func TestShouldRetry(t *testing.T) {
27+
func TestTemporary(t *testing.T) {
2828
tests := []struct {
2929
error *Error
3030
retry bool
@@ -50,10 +50,10 @@ func TestShouldRetry(t *testing.T) {
5050
}}
5151

5252
for _, test := range tests {
53-
retry := test.error.ShouldRetry()
53+
retry := test.error.Temporary()
5454

5555
if test.retry != retry {
56-
t.Errorf("ShouldRetry(%s) = %t, wanted %t", test.error, retry, test.retry)
56+
t.Errorf("Temporary(%s) = %t, wanted %t", test.error, retry, test.retry)
5757
}
5858
}
5959
}

pkg/v1/remote/transport/retry.go

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
// Copyright 2018 Google LLC All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package transport
16+
17+
import (
18+
"net/http"
19+
"time"
20+
21+
"github.com/google/go-containerregistry/pkg/internal/retry"
22+
"k8s.io/apimachinery/pkg/util/wait"
23+
)
24+
25+
// Sleep for 0.1, 0.3, 0.9, 2.7 seconds. This should cover networking blips.
26+
var defaultBackoff = wait.Backoff{
27+
Duration: 100 * time.Millisecond,
28+
Factor: 3.0,
29+
Jitter: 0.1,
30+
Steps: 5,
31+
}
32+
33+
var _ http.RoundTripper = (*retryTransport)(nil)
34+
35+
// retryTransport wraps a RoundTripper and retries temporary network errors.
36+
type retryTransport struct {
37+
inner http.RoundTripper
38+
backoff wait.Backoff
39+
predicate retry.Predicate
40+
}
41+
42+
// Option is a functional option for retryTransport.
43+
type Option func(*options)
44+
45+
type options struct {
46+
backoff wait.Backoff
47+
predicate retry.Predicate
48+
}
49+
50+
// WithBackoff sets the backoff for retry operations.
51+
func WithBackoff(backoff wait.Backoff) Option {
52+
return func(o *options) {
53+
o.backoff = backoff
54+
}
55+
}
56+
57+
// WithPredicate sets the predicate for retry operations.
58+
func WithPredicate(predicate func(error) bool) Option {
59+
return func(o *options) {
60+
o.predicate = predicate
61+
}
62+
}
63+
64+
// NewRetry returns a transport that retries errors.
65+
func NewRetry(inner http.RoundTripper, opts ...Option) http.RoundTripper {
66+
o := &options{
67+
backoff: defaultBackoff,
68+
predicate: retry.IsTemporary,
69+
}
70+
71+
for _, opt := range opts {
72+
opt(o)
73+
}
74+
75+
return &retryTransport{
76+
inner: inner,
77+
backoff: o.backoff,
78+
predicate: o.predicate,
79+
}
80+
}
81+
82+
func (t *retryTransport) RoundTrip(in *http.Request) (out *http.Response, err error) {
83+
roundtrip := func() error {
84+
out, err = t.inner.RoundTrip(in)
85+
return err
86+
}
87+
retry.Retry(roundtrip, retry.WithBackoff(t.backoff), retry.WithPredicate(t.predicate))
88+
return
89+
}

0 commit comments

Comments
 (0)