Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 8 additions & 36 deletions cmd/livenessprobe/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,8 @@ import (
"net"
"net/http"
"os"
"sync"
"time"

"google.golang.org/grpc"
"k8s.io/klog/v2"

"k8s.io/component-base/featuregate"
Expand Down Expand Up @@ -62,7 +60,7 @@ func (h *healthProbe) checkProbe(w http.ResponseWriter, req *http.Request) {
ctx, cancel := context.WithTimeout(req.Context(), *probeTimeout)
defer cancel()

conn, err := acquireConnection(ctx, h.metricsManager)
conn, err := connlib.Connect(*csiAddress, h.metricsManager, connlib.WithTimeout(*probeTimeout))
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(err.Error()))
Expand Down Expand Up @@ -92,37 +90,6 @@ func (h *healthProbe) checkProbe(w http.ResponseWriter, req *http.Request) {
klog.V(5).InfoS("Health check succeeded")
}

// acquireConnection wraps the connlib.Connect but adding support to context
// cancelation.
func acquireConnection(ctx context.Context, metricsManager metrics.CSIMetricsManager) (conn *grpc.ClientConn, err error) {

var m sync.Mutex
var canceled bool
ready := make(chan bool)
go func() {
conn, err = connlib.Connect(*csiAddress, metricsManager)

m.Lock()
defer m.Unlock()
if err != nil && canceled && conn != nil {
conn.Close()
}

close(ready)
}()

select {
case <-ctx.Done():
m.Lock()
defer m.Unlock()
canceled = true
return nil, ctx.Err()

case <-ready:
return conn, err
}
}

func main() {
fg := featuregate.NewFeatureGate()
logsapi.AddFeatureGates(fg)
Expand Down Expand Up @@ -151,10 +118,14 @@ func main() {
}

metricsManager := metrics.NewCSIMetricsManager("" /* driverName */)
csiConn, err := acquireConnection(context.Background(), metricsManager)
// Connect to the CSI driver without any timeout to avoid crashing the probe when the driver is not ready yet.
// Goal: liveness probe never crashes, it only fails the probe when the driver is not available (yet).
// Since a http server for the probe is not running at this point, Kubernetes liveness probe will fail immediately
// with "connection refused", which is good enough to fail the probe.
csiConn, err := connlib.Connect(*csiAddress, metricsManager, connlib.WithTimeout(0))
if err != nil {
// connlib should retry forever so a returned error should mean
// the grpc client is misconfigured rather than an error on the network
// the grpc client is misconfigured rather than an error on the network or CSI driver.
klog.ErrorS(err, "Failed to establish connection to CSI driver")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
Expand All @@ -163,6 +134,7 @@ func main() {
csiDriverName, err := rpc.GetDriverName(context.Background(), csiConn)
csiConn.Close()
if err != nil {
// The CSI driver does not support GetDriverName, which is serious enough to crash the probe.
klog.ErrorS(err, "Failed to get CSI driver name")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ require (
github.com/golang/mock v1.6.0
github.com/kubernetes-csi/csi-lib-utils v0.17.0
github.com/kubernetes-csi/csi-test/v5 v5.2.0
google.golang.org/grpc v1.60.1
k8s.io/component-base v0.29.0
k8s.io/klog/v2 v2.110.1
)
Expand Down Expand Up @@ -46,6 +45,7 @@ require (
golang.org/x/sys v0.14.0 // indirect
golang.org/x/text v0.14.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 // indirect
google.golang.org/grpc v1.60.1 // indirect
google.golang.org/protobuf v1.31.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
Expand Down