Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 7 additions & 37 deletions cmd/livenessprobe/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,8 @@ import (
"net"
"net/http"
"os"
"sync"
"time"

"google.golang.org/grpc"
"k8s.io/klog/v2"

"k8s.io/component-base/featuregate"
Expand Down Expand Up @@ -62,7 +60,7 @@ func (h *healthProbe) checkProbe(w http.ResponseWriter, req *http.Request) {
ctx, cancel := context.WithTimeout(req.Context(), *probeTimeout)
defer cancel()

conn, err := acquireConnection(ctx, h.metricsManager)
conn, err := connlib.Connect(*csiAddress, h.metricsManager, connlib.WithTimeout(*probeTimeout))
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(err.Error()))
Expand Down Expand Up @@ -92,37 +90,6 @@ func (h *healthProbe) checkProbe(w http.ResponseWriter, req *http.Request) {
klog.V(5).InfoS("Health check succeeded")
}

// acquireConnection wraps the connlib.Connect but adding support to context
// cancelation.
func acquireConnection(ctx context.Context, metricsManager metrics.CSIMetricsManager) (conn *grpc.ClientConn, err error) {

var m sync.Mutex
var canceled bool
ready := make(chan bool)
go func() {
conn, err = connlib.Connect(*csiAddress, metricsManager)

m.Lock()
defer m.Unlock()
if err != nil && canceled && conn != nil {
conn.Close()
}

close(ready)
}()

select {
case <-ctx.Done():
m.Lock()
defer m.Unlock()
canceled = true
return nil, ctx.Err()

case <-ready:
return conn, err
}
}

func main() {
fg := featuregate.NewFeatureGate()
logsapi.AddFeatureGates(fg)
Expand Down Expand Up @@ -151,10 +118,13 @@ func main() {
}

metricsManager := metrics.NewCSIMetricsManager("" /* driverName */)
csiConn, err := acquireConnection(context.Background(), metricsManager)
csiConn, err := connlib.Connect(*csiAddress, metricsManager, connlib.WithTimeout(5*time.Minute))
if err != nil {
// connlib should retry forever so a returned error should mean
// the grpc client is misconfigured rather than an error on the network
// It is not possible to configure connlib to never timeout while acquiring a connection.
// If we allow it to timeout too quickly, livenessprobe may enter a crash loop and it may
// become impossible for a liveness probe on the CSI plugin to ever succeed. Set a five
// minute timeout (the maximum exponential backoff time) so livenessprobe at least waits
// long enough for a crashing CSI plugin to restart.
klog.ErrorS(err, "Failed to establish connection to CSI driver")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ require (
github.com/golang/mock v1.6.0
github.com/kubernetes-csi/csi-lib-utils v0.16.0
github.com/kubernetes-csi/csi-test/v5 v5.2.0
google.golang.org/grpc v1.60.1
k8s.io/component-base v0.29.0
k8s.io/klog/v2 v2.110.1
)
Expand Down Expand Up @@ -46,6 +45,7 @@ require (
golang.org/x/sys v0.14.0 // indirect
golang.org/x/text v0.14.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 // indirect
google.golang.org/grpc v1.60.1 // indirect
google.golang.org/protobuf v1.31.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
Expand Down