Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cmd/gce-pd-csi-driver/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ func handle() {
klog.Errorf("Failed to emit process start time: %v", err.Error())
}
mm.RegisterMountMetric()
mm.RegisterUnexpectedDevicePathChangesMetric()
}
metricsManager = &mm
}
Expand Down Expand Up @@ -282,7 +283,7 @@ func handle() {
klog.Fatalf("Failed to get node info from API server: %v", err.Error())
}

deviceCache, err := linkcache.NewDeviceCacheForNode(ctx, *diskCacheSyncPeriod, *nodeName, driverName, deviceUtils)
deviceCache, err := linkcache.NewDeviceCacheForNode(ctx, *diskCacheSyncPeriod, *nodeName, driverName, deviceUtils, metricsManager)
if err != nil {
klog.Warningf("Failed to create device cache: %v", err.Error())
} else {
Expand Down
21 changes: 13 additions & 8 deletions pkg/linkcache/devices_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,22 @@ import (
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common"
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/deviceutils"
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/k8sclient"
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/metrics"
)

const byIdDir = "/dev/disk/by-id"

func NewDeviceCacheForNode(ctx context.Context, period time.Duration, nodeName string, driverName string, deviceUtils deviceutils.DeviceUtils) (*DeviceCache, error) {
func NewDeviceCacheForNode(ctx context.Context, period time.Duration, nodeName string, driverName string, deviceUtils deviceutils.DeviceUtils, metricsManager *metrics.MetricsManager) (*DeviceCache, error) {
node, err := k8sclient.GetNodeWithRetry(ctx, nodeName)
if err != nil {
return nil, fmt.Errorf("failed to get node %s: %w", nodeName, err)
}

return newDeviceCacheForNode(period, node, driverName, deviceUtils), nil
return newDeviceCacheForNode(period, node, driverName, deviceUtils, metricsManager), nil
}

func NewTestDeviceCache(period time.Duration, node *v1.Node) *DeviceCache {
return newDeviceCacheForNode(period, node, "pd.csi.storage.gke.io", deviceutils.NewDeviceUtils())
return newDeviceCacheForNode(period, node, "pd.csi.storage.gke.io", deviceutils.NewDeviceUtils(), nil)
}

func NewTestNodeWithVolumes(volumes []string) *v1.Node {
Expand All @@ -42,12 +43,13 @@ func NewTestNodeWithVolumes(volumes []string) *v1.Node {
}
}

func newDeviceCacheForNode(period time.Duration, node *v1.Node, driverName string, deviceUtils deviceutils.DeviceUtils) *DeviceCache {
func newDeviceCacheForNode(period time.Duration, node *v1.Node, driverName string, deviceUtils deviceutils.DeviceUtils, metricsManager *metrics.MetricsManager) *DeviceCache {
deviceCache := &DeviceCache{
symlinks: make(map[string]deviceMapping),
period: period,
deviceUtils: deviceUtils,
dir: byIdDir,
symlinks: make(map[string]deviceMapping),
period: period,
deviceUtils: deviceUtils,
dir: byIdDir,
metricsManager: metricsManager,
}

// Look at the status.volumesInUse field. For each, take the last section
Expand Down Expand Up @@ -163,6 +165,9 @@ func (d *DeviceCache) listAndUpdate() {
// Check if the realPath has changed
if realPath != device.realPath {
klog.Warningf("Change in device path for volume %s (symlink: %s), previous path: %s, new path: %s", device.volumeID, symlink, device.realPath, realPath)
if d.metricsManager != nil {
d.metricsManager.RecordUnexpectedDevicePathChangesMetric()
}

// Update the cache with the new realPath
device.realPath = realPath
Expand Down
3 changes: 2 additions & 1 deletion pkg/linkcache/devices_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ import (

"k8s.io/klog/v2"
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/deviceutils"
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/metrics"
)

func NewDeviceCacheForNode(ctx context.Context, period time.Duration, nodeName string, driverName string, deviceUtils deviceutils.DeviceUtils) (*DeviceCache, error) {
func NewDeviceCacheForNode(ctx context.Context, period time.Duration, nodeName string, driverName string, deviceUtils deviceutils.DeviceUtils, metricsManager *metrics.MetricsManager) (*DeviceCache, error) {
klog.Infof("NewDeviceCacheForNode is not implemented for Windows")
return nil, nil
}
Expand Down
6 changes: 4 additions & 2 deletions pkg/linkcache/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"time"

"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/deviceutils"
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/metrics"
)

type deviceMapping struct {
Expand All @@ -17,6 +18,7 @@ type DeviceCache struct {
symlinks map[string]deviceMapping
period time.Duration
// dir is the directory to look for device symlinks
dir string
deviceUtils deviceutils.DeviceUtils
dir string
deviceUtils deviceutils.DeviceUtils
metricsManager *metrics.MetricsManager
}
18 changes: 18 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,15 @@ var (
},
[]string{"driver_name", "file_system_format", "error_type"},
)

unexpectedDevicePathChangesMetric = metrics.NewCounterVec(&metrics.CounterOpts{
Subsystem: "node",
Name: "unexpected_device_path_changes",
Help: "Unexpected device path changes",
StabilityLevel: metrics.ALPHA,
},
[]string{"driver_name"},
)
)

type MetricsManager struct {
Expand Down Expand Up @@ -92,6 +101,10 @@ func (mm *MetricsManager) RegisterMountMetric() {
mm.registry.MustRegister(mountErrorMetric)
}

func (mm *MetricsManager) RegisterUnexpectedDevicePathChangesMetric() {
mm.registry.MustRegister(unexpectedDevicePathChangesMetric)
}

func (mm *MetricsManager) recordComponentVersionMetric() error {
v := getEnvVar(envGKEPDCSIVersion)
if v == "" {
Expand Down Expand Up @@ -121,6 +134,11 @@ func (mm *MetricsManager) RecordMountErrorMetric(fs_format string, err error) {
klog.Infof("Recorded mount error type: %q", errType)
}

func (mm *MetricsManager) RecordUnexpectedDevicePathChangesMetric() {
unexpectedDevicePathChangesMetric.WithLabelValues(pdcsiDriverName).Inc()
klog.Infof("Recorded unexpected device path change")
}

func (mm *MetricsManager) EmmitProcessStartTime() error {
return metrics.RegisterProcessStartTime(mm.registry.Register)
}
Expand Down