Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 29 additions & 11 deletions agent/rpc/client_grpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"encoding/json"
"errors"
"strings"
"sync"
"time"

"github.com/cenkalti/backoff/v5"
Expand Down Expand Up @@ -47,32 +48,49 @@ const (
// Maximum amount of time between sending consecutive batched log messages.
// Controls the delay between the CI job generating a log record, and web users receiving it.
maxLogFlushPeriod time.Duration = time.Second

// ConnectionRetryTimeout is the maximum time to wait for a connection to be restored
// before the agent gives up and exits.
ConnectionRetryTimeout = 2 * time.Minute
)

type client struct {
client proto.WoodpeckerClient
conn *grpc.ClientConn
logs chan *proto.LogEntry
connectionLostAt time.Time
client proto.WoodpeckerClient
conn *grpc.ClientConn
logs chan *proto.LogEntry
connectionLostAt time.Time
connectionLostLock sync.Mutex
// connectionRetryTimeout is the maximum time to wait for a connection to be restored before the agent gives up and exits.
connectionRetryTimeout time.Duration
}

// NewGrpcClient returns a new grpc Client.
func NewGrpcClient(ctx context.Context, conn *grpc.ClientConn) rpc.Peer {
func NewGrpcClient(ctx context.Context, conn *grpc.ClientConn, opts ...ClientOption) rpc.Peer {
client := new(client)
client.client = proto.NewWoodpeckerClient(conn)
client.conn = conn
client.logs = make(chan *proto.LogEntry, 10) // max memory use: 10 lines * 1 MiB

for _, opt := range opts {
opt(client)
}

go client.processLogs(ctx)
return client
}

type ClientOption func(c *client)

func SetConnectionRetryTimeout(d time.Duration) ClientOption {
if d == 0 {
log.Warn().Msg("connection retry timeout set to infinite")
}
return func(c *client) {
c.connectionRetryTimeout = d
}
}

func (c *client) IsConnected() bool {
state := c.conn.GetState()
connected := state == connectivity.Ready || state == connectivity.Idle
c.connectionLostLock.Lock()
defer c.connectionLostLock.Unlock()
if !connected && c.connectionLostAt.IsZero() {
c.connectionLostAt = time.Now()
} else if connected && !c.connectionLostAt.IsZero() {
Expand All @@ -82,10 +100,10 @@ func (c *client) IsConnected() bool {
}

func (c *client) shouldGiveUp() bool {
if c.connectionLostAt.IsZero() {
if c.connectionRetryTimeout == 0 || c.connectionLostAt.IsZero() {
return false
}
return time.Since(c.connectionLostAt) > ConnectionRetryTimeout
return time.Since(c.connectionLostAt) > c.connectionRetryTimeout
}

func (c *client) newBackOff() backoff.BackOff {
Expand Down
4 changes: 3 additions & 1 deletion cmd/agent/core/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,9 @@ func run(ctx context.Context, c *cli.Command, backends []types.Backend) error {
}
defer conn.Close()

client := agent_rpc.NewGrpcClient(ctx, conn)
client := agent_rpc.NewGrpcClient(ctx, conn,
agent_rpc.SetConnectionRetryTimeout(c.Duration("retry-timeout")),
)
agentConfigPersisted := atomic.Bool{}

grpcCtx := metadata.NewOutgoingContext(grpcClientCtx, metadata.Pairs("hostname", hostname))
Expand Down
6 changes: 6 additions & 0 deletions cmd/agent/core/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ var flags = []cli.Flag{
Usage: "should the grpc server certificate be verified, only valid when WOODPECKER_GRPC_SECURE is true",
Value: true,
},
&cli.DurationFlag{
Sources: cli.EnvVars("WOODPECKER_RETRY_TIMEOUT"),
Name: "retry-timeout",
Usage: "how long the agent keeps retrying to reconnect to the server after the gRPC connection is lost before giving up, set to 0 to retry forever",
Value: 2 * time.Minute,
},
&cli.StringFlag{
Sources: cli.EnvVars("WOODPECKER_HOSTNAME"),
Name: "hostname",
Expand Down
13 changes: 13 additions & 0 deletions docs/docs/30-administration/10-configuration/30-agent.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,19 @@ Configures if the gRPC server certificate should be verified, only valid when `W

---

## RETRY_TIMEOUT

- Name: `WOODPECKER_RETRY_TIMEOUT`
- Default: `2m`

Set how long the agent keeps retrying to reconnect to the server after the gRPC connection is lost before giving up.

:::warning
If set to 0 we retry forever.
:::

---

### BACKEND

- Name: `WOODPECKER_BACKEND`
Expand Down