Skip to content

Commit a9f98f3

Browse files
authored
Nvme resiliency read from sysfs instead of nvme commands
Signed-off-by: kj-netapp <[email protected]>
1 parent ffa2375 commit a9f98f3

File tree

10 files changed

+485
-74
lines changed

10 files changed

+485
-74
lines changed

frontend/csi/node_server.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -2517,9 +2517,10 @@ func (p *Plugin) nodeUnstageNVMeVolume(
25172517
disconnect := p.nvmeHandler.RemovePublishedNVMeSession(&publishedNVMeSessions, publishInfo.NVMeSubsystemNQN,
25182518
publishInfo.NVMeNamespaceUUID)
25192519

2520+
nvmeSubsys := p.nvmeHandler.NewNVMeSubsystem(ctx, publishInfo.NVMeSubsystemNQN)
25202521
// Get the device using 'nvme-cli' commands. Flush the device IOs.
25212522
// Proceed further with unstage flow, if device is not found.
2522-
nvmeDev, err := p.nvmeHandler.NewNVMeDevice(ctx, publishInfo.NVMeNamespaceUUID)
2523+
nvmeDev, err := nvmeSubsys.GetNVMeDevice(ctx, publishInfo.NVMeNamespaceUUID)
25232524
if err != nil && !errors.IsNotFoundError(err) {
25242525
return nil, fmt.Errorf("failed to get NVMe device; %v", err)
25252526
}
@@ -2586,7 +2587,6 @@ func (p *Plugin) nodeUnstageNVMeVolume(
25862587
}
25872588

25882589
// Get the number of namespaces associated with the subsystem
2589-
nvmeSubsys := p.nvmeHandler.NewNVMeSubsystem(ctx, publishInfo.NVMeSubsystemNQN)
25902590
numNs, err := nvmeSubsys.GetNamespaceCount(ctx)
25912591
if err != nil {
25922592
Logc(ctx).WithField(

mocks/mock_utils/mock_filesystem/mock_filesystem_client.go

+31
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

utils/filesystem/filesystem.go

+31
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ type Filesystem interface {
6565
ctx context.Context, path string,
6666
) (available, capacity, usage, inodes, inodesFree, inodesUsed int64, err error)
6767
GetUnmountPath(ctx context.Context, trackingInfo *models.VolumeTrackingInfo) (string, error)
68+
ScanFile(filename string) ([]byte, error)
69+
ScanDir(path string) ([]os.FileInfo, error)
6870
}
6971

7072
type Mount interface {
@@ -340,3 +342,32 @@ func (f *FSClient) DeleteFile(ctx context.Context, filepath, fileDescription str
340342

341343
return filepath, nil
342344
}
345+
346+
func (f *FSClient) ScanFile(filename string) ([]byte, error) {
347+
fs := afero.NewOsFs()
348+
349+
file, err := fs.Open(filename)
350+
if err != nil {
351+
fmt.Println("Failed to open file:", err)
352+
return nil, err
353+
}
354+
defer file.Close()
355+
356+
data, err := afero.ReadAll(file)
357+
if err != nil {
358+
fmt.Println("Failed to read file:", err)
359+
return nil, err
360+
}
361+
return data, nil
362+
}
363+
364+
func (f *FSClient) ScanDir(path string) ([]os.FileInfo, error) {
365+
fs := afero.NewOsFs()
366+
367+
dirEntries, err := afero.ReadDir(fs, path)
368+
if err != nil {
369+
fmt.Println("Failed to read directory:", err)
370+
return nil, err
371+
}
372+
return dirEntries, nil
373+
}

utils/filesystem/filesystem_windows.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ func (f *FSClient) getFilesystemSize(ctx context.Context, _ string) (int64, erro
2121
}
2222

2323
func (f *FSClient) GetFilesystemStats(ctx context.Context, path string) (int64, int64, int64, int64, int64, int64,
24-
error) {
24+
error,
25+
) {
2526
Logc(ctx).Debug(">>>> filesystem_windows.GetFilesystemStats")
2627
defer Logc(ctx).Debug("<<<< filesystem_windows.GetFilesystemStats")
2728

utils/nvme.go

+29-61
Original file line numberDiff line numberDiff line change
@@ -14,47 +14,25 @@ import (
1414

1515
. "github.com/netapp/trident/logging"
1616
"github.com/netapp/trident/utils/devices/luks"
17-
"github.com/netapp/trident/utils/errors"
1817
"github.com/netapp/trident/utils/exec"
1918
"github.com/netapp/trident/utils/filesystem"
2019
"github.com/netapp/trident/utils/models"
2120
)
2221

2322
const NVMeAttachTimeout = 20 * time.Second
2423

25-
var fsClient = filesystem.New(mountClient)
26-
27-
// getNVMeSubsystem returns the NVMe subsystem details.
28-
func getNVMeSubsystem(ctx context.Context, subsysNqn string) (*NVMeSubsystem, error) {
29-
Logc(ctx).Debug(">>>> nvme.getNVMeSubsystem")
30-
defer Logc(ctx).Debug("<<<< nvme.getNVMeSubsystem")
31-
32-
subsys, err := GetNVMeSubsystemList(ctx)
33-
if err != nil {
34-
Logc(ctx).WithField("Error", err).Errorf("Failed to get subsystem list: %v", err)
35-
return nil, err
36-
}
37-
38-
// Getting current subsystem details.
39-
for _, sub := range subsys.Subsystems {
40-
if sub.NQN == subsysNqn {
41-
return &sub, nil
42-
}
43-
}
44-
45-
return nil, fmt.Errorf("couldn't find subsystem %s", subsysNqn)
46-
}
24+
var fsClient = *filesystem.New(mountClient)
4725

4826
// updatePaths updates the paths with the current state of the subsystem on the k8s node.
4927
func (s *NVMeSubsystem) updatePaths(ctx context.Context) error {
5028
// Getting current state of subsystem on the k8s node.
51-
sub, err := getNVMeSubsystem(ctx, s.NQN)
29+
paths, err := GetNVMeSubsystemPaths(ctx, fsClient, s.Name)
5230
if err != nil {
5331
Logc(ctx).WithField("Error", err).Errorf("Failed to update subsystem paths: %v", err)
5432
return fmt.Errorf("failed to update subsystem paths: %v", err)
5533
}
5634

57-
s.Paths = sub.Paths
35+
s.Paths = paths
5836

5937
return nil
6038
}
@@ -137,43 +115,38 @@ func (s *NVMeSubsystem) Disconnect(ctx context.Context) error {
137115

138116
// GetNamespaceCount returns the number of namespaces mapped to the subsystem.
139117
func (s *NVMeSubsystem) GetNamespaceCount(ctx context.Context) (int, error) {
140-
var combinedError error
141-
118+
credibility := false
142119
for _, path := range s.Paths {
143-
count, err := GetNamespaceCountForSubsDevice(ctx, "/dev/"+path.Name)
144-
if err != nil {
145-
Logc(ctx).WithField("Error", err).Warnf("Failed to get namespace count: %v", err)
146-
combinedError = multierr.Append(combinedError, err)
147-
continue
120+
if path.State == "live" {
121+
credibility = true
122+
break
148123
}
124+
}
149125

150-
return count, nil
126+
if !credibility {
127+
return 0, fmt.Errorf("nvme paths are down, couldn't get the number of namespaces")
128+
}
129+
130+
count, err := GetNVMeDeviceCountAt(ctx, s.FS, s.Name)
131+
if err != nil {
132+
Logc(ctx).Errorf("Failed to get namespace count: %v", err)
133+
return 0, err
151134
}
152135

153-
// Couldn't find any sessions, so no namespaces are attached to this subsystem.
154-
// But if there was error getting the number of namespaces from all the paths, return error.
155-
return 0, combinedError
136+
return count, nil
156137
}
157138

158-
// getNVMeDevice returns the NVMe device corresponding to nsPath namespace.
159-
func getNVMeDevice(ctx context.Context, nsUUID string) (*NVMeDevice, error) {
160-
Logc(ctx).Debug(">>>> nvme.getNVMeDevice")
161-
defer Logc(ctx).Debug("<<<< nvme.getNVMeDevice")
139+
func (s *NVMeSubsystem) GetNVMeDevice(ctx context.Context, nsUUID string) (NVMeDeviceInterface, error) {
140+
Logc(ctx).Debug(">>>> nvme.GetNVMeDevice")
141+
defer Logc(ctx).Debug("<<<< nvme.GetNVMeDevice")
162142

163-
dList, err := GetNVMeDeviceList(ctx)
143+
devInterface, err := GetNVMeDeviceAt(ctx, s.Name, nsUUID)
164144
if err != nil {
165-
return nil, fmt.Errorf("failed to get device: %v", err)
166-
}
167-
168-
for _, dev := range dList.Devices {
169-
if dev.UUID == nsUUID {
170-
Logc(ctx).Debugf("Device found: %v.", dev)
171-
return &dev, nil
172-
}
145+
Logc(ctx).Errorf("Failed to get NVMe device, %v", err)
146+
return nil, err
173147
}
174148

175-
Logc(ctx).WithField("nsUUID", nsUUID).Debug("No device found for this Namespace.")
176-
return nil, errors.NotFoundError("no device found for the given namespace %v", nsUUID)
149+
return devInterface, nil
177150
}
178151

179152
// GetPath returns the device path where we mount the filesystem in NodePublish.
@@ -203,7 +176,7 @@ func (d *NVMeDevice) FlushDevice(ctx context.Context, ignoreErrors, force bool)
203176

204177
// IsNil returns true if Device and NamespacePath are not set.
205178
func (d *NVMeDevice) IsNil() bool {
206-
if d == nil || (d.Device == "" && d.NamespacePath == "") {
179+
if d == nil || d.Device == "" {
207180
return true
208181
}
209182
return false
@@ -214,20 +187,15 @@ func NewNVMeHandler() NVMeInterface {
214187
return &NVMeHandler{}
215188
}
216189

217-
// NewNVMeDevice returns new NVMe device
218-
func (nh *NVMeHandler) NewNVMeDevice(ctx context.Context, nsUUID string) (NVMeDeviceInterface, error) {
219-
return getNVMeDevice(ctx, nsUUID)
220-
}
221-
222190
// NewNVMeSubsystem returns NVMe subsystem object. Even if a subsystem is not connected to the k8s node,
223191
// this function returns a minimal NVMe subsystem object.
224192
func (nh *NVMeHandler) NewNVMeSubsystem(ctx context.Context, subsNqn string) NVMeSubsystemInterface {
225-
sub, err := getNVMeSubsystem(ctx, subsNqn)
193+
sub, err := GetNVMeSubsystem(ctx, fsClient, subsNqn)
226194
if err != nil {
227195
Logc(ctx).WithField("Error", err).Warnf("Failed to get subsystem: %v; returning minimal subsystem", err)
228196
return &NVMeSubsystem{NQN: subsNqn}
229197
}
230-
return sub
198+
return &sub
231199
}
232200

233201
// GetHostNqn returns the NQN of the k8s node.
@@ -295,7 +263,7 @@ func AttachNVMeVolume(
295263
}
296264
}
297265

298-
nvmeDev, err := nvmeHandler.NewNVMeDevice(ctx, publishInfo.NVMeNamespaceUUID)
266+
nvmeDev, err := nvmeSubsys.GetNVMeDevice(ctx, publishInfo.NVMeNamespaceUUID)
299267
if err != nil {
300268
return err
301269
}
@@ -589,7 +557,7 @@ func (nh *NVMeHandler) PopulateCurrentNVMeSessions(ctx context.Context, currSess
589557
}
590558

591559
// Get the list of the subsystems currently present on the k8s node.
592-
subs, err := GetNVMeSubsystemList(ctx)
560+
subs, err := listSubsystemsFromSysFs(fsClient, ctx)
593561
if err != nil {
594562
Logc(ctx).WithField("Error", err).Errorf("Failed to get subsystem list: %v", err)
595563
return err

utils/nvme_darwin.go

+35-4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77

88
. "github.com/netapp/trident/logging"
99
"github.com/netapp/trident/utils/errors"
10+
"github.com/netapp/trident/utils/filesystem"
1011
)
1112

1213
// NVMeActiveOnHost checks if NVMe is active on host
@@ -24,10 +25,10 @@ func GetHostNqn(ctx context.Context) (string, error) {
2425
}
2526

2627
// GetNVMeSubsystemList returns the list of subsystems connected to the k8s node.
27-
func GetNVMeSubsystemList(ctx context.Context) (Subsystems, error) {
28-
Logc(ctx).Debug(">>>> nvme_darwin.GetNVMeSubsystemList")
29-
defer Logc(ctx).Debug("<<<< nvme_darwin.GetNVMeSubsystemList")
30-
return Subsystems{}, errors.UnsupportedError("GetNVMeSubsystemList is not supported for darwin")
28+
func listSubsystemsFromSysFs(fs filesystem.FSClient, ctx context.Context) (Subsystems, error) {
29+
Logc(ctx).Debug(">>>> nvme_darwin.listSubsystemsFromSysFs")
30+
defer Logc(ctx).Debug("<<<< nvme_darwin.listSubsystemsFromSysFs")
31+
return Subsystems{}, errors.UnsupportedError("listSubsystemsFromSysFs is not supported for darwin")
3132
}
3233

3334
// ConnectSubsystemToHost creates a path (or session) from the ONTAP subsystem to the k8s node using svmDataLIF.
@@ -44,6 +45,36 @@ func DisconnectSubsystemFromHost(ctx context.Context, subsysNqn string) error {
4445
return errors.UnsupportedError("DisconnectSubsystemFromHost is not supported for darwin")
4546
}
4647

48+
func GetNVMeSubsystem(ctx context.Context, fs filesystem.FSClient, nqn string) (NVMeSubsystem, error) {
49+
Logc(ctx).Debug(">>>> nvme_darwin.GetNVMeSubsystem")
50+
defer Logc(ctx).Debug("<<<< nvme_darwin.GetNVMeSubsystem")
51+
return NVMeSubsystem{}, errors.UnsupportedError("GetNVMeSubsystem is not supported for darwin")
52+
}
53+
54+
func GetNVMeSubsystemPaths(ctx context.Context, fs filesystem.FSClient, subsystemDirPath string) ([]Path, error) {
55+
Logc(ctx).Debug(">>>> nvme_darwin.GetNVMeSubsystemPaths")
56+
defer Logc(ctx).Debug("<<<< nvme_darwin.GetNVMeSubsystemPaths")
57+
return []Path{}, errors.UnsupportedError("GetNVMeSubsystemPaths is not supported for darwin")
58+
}
59+
60+
func InitializeNVMeSubsystemPath(ctx context.Context, path *Path) error {
61+
Logc(ctx).Debug(">>>> nvme_darwin.InitializeNVMeSubsystemPath")
62+
defer Logc(ctx).Debug("<<<< nvme_darwin.InitializeNVMeSubsystemPath")
63+
return errors.UnsupportedError("InitializeNVMeSubsystemPath is not supported for darwin")
64+
}
65+
66+
func GetNVMeDeviceCountAt(ctx context.Context, fs filesystem.FSClient, path string) (int, error) {
67+
Logc(ctx).Debug(">>>> nvme_darwin.GetNVMeDeviceCountAt")
68+
defer Logc(ctx).Debug("<<<< nvme_darwin.GetNVMeDeviceCountAt")
69+
return 0, errors.UnsupportedError("GetNVMeDeviceCountAt is not supported for darwin")
70+
}
71+
72+
func GetNVMeDeviceAt(ctx context.Context, path, nsUUID string) (NVMeDeviceInterface, error) {
73+
Logc(ctx).Debug(">>>> nvme_darwin.GetNVMeDeviceAt")
74+
defer Logc(ctx).Debug("<<<< nvme_darwin.GetNVMeDeviceAt")
75+
return nil, errors.UnsupportedError("GetNVMeDeviceAt is not supported for darwin")
76+
}
77+
4778
// GetNamespaceCountForSubsDevice returns the number of namespaces present in a given subsystem device.
4879
func GetNamespaceCountForSubsDevice(ctx context.Context, subsDevice string) (int, error) {
4980
Logc(ctx).Debug(">>>> nvme_darwin.GetNamespaceCount")

0 commit comments

Comments
 (0)