Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,13 @@ Controller-level and node-level deployments will both have priorityClassName set
As noted in [GCP PD documentation](https://cloud.google.com/kubernetes-engine/docs/how-to/persistent-volumes/gce-pd-csi-driver), `ext4` and `xfs` are officially supported. `btrfs` support is experimental:
- As of writing, Ubuntu VM images support btrfs, but [COS does not](https://cloud.google.com/container-optimized-os/docs/concepts/supported-filesystems).

`btrfs` filesystem accepts two "special" mount options:
`btrfs` filesystem accepts the following "special" mount options and the sysfs paths they target:

- `btrfs-data-bg_reclaim_threshold`
- `btrfs-metadata-bg_reclaim_threshold`
- `btrfs-data-bg_reclaim_threshold`: `/sys/fs/btrfs/FS-UUID/allocation/data/bg_reclaim_threshold`.
- `btrfs-metadata-bg_reclaim_thresho: `/sys/fs/btrfs/FS-UUID/allocation/metadata/bg_reclaim_threshold`.
- `btrfs-bdi-read_ahead_kb`: `/sys/fs/btrfs/FS-UUID/bdi/read_ahead_kb`.

Which writes to `/sys/fs/btrfs/FS-UUID/allocation/{,meta}data/bg_reclaim_threshold`, as documented [in btrfs docs](https://btrfs.readthedocs.io/en/latest/ch-sysfs.html#uuid-allocations-data-metadata-system).
See more in the [in btrfs docs](https://btrfs.readthedocs.io/en/latest/ch-sysfs.html#uuid-allocations-data-metadata-system).

## Further Documentation

Expand Down
78 changes: 43 additions & 35 deletions pkg/gce-pd-csi-driver/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,14 @@ const (
readAheadKBMountFlagRegexPattern = "^read_ahead_kb=(.+)$"
btrfsReclaimDataRegexPattern = "^btrfs-allocation-data-bg_reclaim_threshold=(\\d{1,2})$" // 0-99 are valid, incl. 00
btrfsReclaimMetadataRegexPattern = "^btrfs-allocation-metadata-bg_reclaim_threshold=(\\d{1,2})$" // ditto ^
btrfsReadAheadKBRegexPattern = "^btrfs-bdi-read_ahead_kb=(\\d+)$"
)

var (
readAheadKBMountFlagRegex = regexp.MustCompile(readAheadKBMountFlagRegexPattern)
btrfsReclaimDataRegex = regexp.MustCompile(btrfsReclaimDataRegexPattern)
btrfsReclaimMetadataRegex = regexp.MustCompile(btrfsReclaimMetadataRegexPattern)
btrfsReadAheadKBRegex = regexp.MustCompile(btrfsReadAheadKBRegexPattern)
)

func getDefaultFsType() string {
Expand Down Expand Up @@ -402,7 +404,7 @@ func (ns *GCENodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStage
// Part 3: Mount device to stagingTargetPath
fstype := getDefaultFsType()

var btrfsReclaimData, btrfsReclaimMetadata string
var btrfsReclaimData, btrfsReclaimMetadata, btrfsReadAheadKb string
shouldUpdateReadAhead := false
var readAheadKB int64
options := []string{}
Expand All @@ -418,7 +420,7 @@ func (ns *GCENodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStage
}

if mnt.FsType == fsTypeBtrfs {
btrfsReclaimData, btrfsReclaimMetadata = extractBtrfsReclaimFlags(mnt.MountFlags)
btrfsReclaimData, btrfsReclaimMetadata, btrfsReadAheadKb = extractBtrfsFlags(mnt.MountFlags)
}
} else if blk := volumeCapability.GetBlock(); blk != nil {
// Noop for Block NodeStageVolume
Expand Down Expand Up @@ -465,47 +467,52 @@ func (ns *GCENodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStage
}
}

// Part 5: Update read_ahead
// Part 5: Update read_ahead for the block device
if shouldUpdateReadAhead {
if err := ns.updateReadAhead(devicePath, readAheadKB); err != nil {
return nil, status.Errorf(codes.Internal, "failure updating readahead for %s to %dKB: %v", devicePath, readAheadKB, err.Error())
}
}

// Part 6: if configured, write sysfs values
btrfsSysfs := map[string]string{}

if btrfsReadAheadKb != "" {
btrfsSysfs["bdi/read_ahead_kb"] = btrfsReadAheadKb
}

if !readonly {
sysfs := map[string]string{}
if btrfsReclaimData != "" {
sysfs["allocation/data/bg_reclaim_threshold"] = btrfsReclaimData
btrfsSysfs["allocation/data/bg_reclaim_threshold"] = btrfsReclaimData
}
if btrfsReclaimMetadata != "" {
sysfs["allocation/metadata/bg_reclaim_threshold"] = btrfsReclaimMetadata
}

if len(sysfs) > 0 {
args := []string{"--match-tag", "UUID", "--output", "value", stagingTargetPath}
cmd := ns.Mounter.Exec.Command("blkid", args...)
var stderr bytes.Buffer
cmd.SetStderr(&stderr)
klog.V(4).Infof(
"running %q for volume %s",
strings.Join(append([]string{"blkid"}, args...), " "),
volumeID,
)
uuid, err := cmd.Output()
if err != nil {
klog.Errorf("blkid failed for %s. stderr:\n%s", volumeID, stderr.String())
return nil, status.Errorf(codes.Internal, "blkid failed: %v", err)
}
uuid = bytes.TrimRight(uuid, "\n")
btrfsSysfs["allocation/metadata/bg_reclaim_threshold"] = btrfsReclaimMetadata
}
}

for key, value := range sysfs {
path := fmt.Sprintf("%s/fs/btrfs/%s/%s", ns.SysfsPath, uuid, key)
if err := writeSysfs(path, value); err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
klog.V(4).Infof("NodeStageVolume set %s %s=%s", volumeID, key, value)
// Part 6: if configured, write sysfs values
if len(btrfsSysfs) > 0 {
args := []string{"--match-tag", "UUID", "--output", "value", stagingTargetPath}
cmd := ns.Mounter.Exec.Command("blkid", args...)
var stderr bytes.Buffer
cmd.SetStderr(&stderr)
klog.V(4).Infof(
"running %q for volume %s",
strings.Join(append([]string{"blkid"}, args...), " "),
volumeID,
)
uuid, err := cmd.Output()
if err != nil {
klog.Errorf("blkid failed for %s. stderr:\n%s", volumeID, stderr.String())
return nil, status.Errorf(codes.Internal, "blkid failed: %v", err)
}
uuid = bytes.TrimRight(uuid, "\n")

for key, value := range btrfsSysfs {
path := fmt.Sprintf("%s/fs/btrfs/%s/%s", ns.SysfsPath, uuid, key)
if err := writeSysfs(path, value); err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
klog.V(4).Infof("NodeStageVolume set %s %s=%s", volumeID, key, value)
}
}

Expand All @@ -526,7 +533,6 @@ func writeSysfs(path, value string) (_err error) {
if _, err := f.Write([]byte(value)); err != nil {
return err
}

return nil
}

Expand All @@ -546,16 +552,18 @@ func (ns *GCENodeServer) updateReadAhead(devicePath string, readAheadKB int64) e
return nil
}

func extractBtrfsReclaimFlags(mountFlags []string) (string, string) {
var reclaimData, reclaimMetadata string
func extractBtrfsFlags(mountFlags []string) (string, string, string) {
var reclaimData, reclaimMetadata, readAheadKb string
for _, mountFlag := range mountFlags {
if got := btrfsReclaimDataRegex.FindStringSubmatch(mountFlag); len(got) == 2 {
reclaimData = got[1]
} else if got := btrfsReclaimMetadataRegex.FindStringSubmatch(mountFlag); len(got) == 2 {
reclaimMetadata = got[1]
} else if got := btrfsReadAheadKBRegex.FindStringSubmatch(mountFlag); len(got) == 2 {
readAheadKb = got[1]
}
}
return reclaimData, reclaimMetadata
return reclaimData, reclaimMetadata, readAheadKb
}

func extractReadAheadKBMountFlag(mountFlags []string) (int64, bool, error) {
Expand Down
59 changes: 35 additions & 24 deletions pkg/gce-pd-csi-driver/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"context"
"fmt"
"os"
"path"
"path/filepath"
"strings"
"testing"
Expand Down Expand Up @@ -625,17 +626,24 @@ func TestNodeStageVolume(t *testing.T) {
defer os.RemoveAll(tempDir)
stagingPath := filepath.Join(tempDir, defaultStagingPath)

btrfsUUID := "00000000-0000-0000-0000-000000000001"
btrfsPrefix := fmt.Sprintf("%s/sys/fs/btrfs/%s/allocation", tempDir, btrfsUUID)
var (
btrfsUUID = "00000000-0000-0000-0000-000000000001"
btrfsPrefix = fmt.Sprintf("%s/sys/fs/btrfs/%s", tempDir, btrfsUUID)
btrfsFixtures = map[string]string{
"allocation/data/bg_reclaim_threshold": "0\n",
"allocation/metadata/bg_reclaim_threshold": "0\n",
"bdi/read_ahead_kb": "4096\n",
}
)

for _, suffix := range []string{"data", "metadata"} {
dir := btrfsPrefix + "/" + suffix
for fname, contents := range btrfsFixtures {
fullPath := btrfsPrefix + "/" + fname
dir := path.Dir(fullPath)
if err := os.MkdirAll(dir, 0755); err != nil {
t.Fatalf("Failed to set up fake sysfs dir %q: %v", dir, err)
}
fname := dir + "/bg_reclaim_threshold"
if err := os.WriteFile(fname, []byte("0\n"), 0644); err != nil {
t.Fatalf("write %q: %v", fname, err)
if err := os.WriteFile(fullPath, []byte(contents), 0644); err != nil {
t.Fatalf("write %q: %v", fullPath, err)
}
}

Expand All @@ -653,6 +661,7 @@ func TestNodeStageVolume(t *testing.T) {
readAheadSectors string
btrfsReclaimData string
btrfsReclaimMetadata string
btrfsReadAheadKb string
sectorSizeInBytes int
expErrCode codes.Code
}{
Expand Down Expand Up @@ -907,7 +916,7 @@ func TestNodeStageVolume(t *testing.T) {
},
},
{
name: "Valid request, set btrfs-allocation-{,meta}data-bg_reclaim_threshold",
name: "Valid request, set btrfs props",
req: &csi.NodeStageVolumeRequest{
VolumeId: volumeID,
StagingTargetPath: stagingPath,
Expand All @@ -918,6 +927,7 @@ func TestNodeStageVolume(t *testing.T) {
MountFlags: []string{
"btrfs-allocation-data-bg_reclaim_threshold=90",
"btrfs-allocation-metadata-bg_reclaim_threshold=91",
"btrfs-bdi-read_ahead_kb=128",
},
},
},
Expand All @@ -931,6 +941,7 @@ func TestNodeStageVolume(t *testing.T) {
readonlyBit: "0",
btrfsReclaimData: "90",
btrfsReclaimMetadata: "91",
btrfsReadAheadKb: "128",
expCommandList: []fakeCmd{
{
cmd: "blkid",
Expand Down Expand Up @@ -1256,29 +1267,29 @@ func TestNodeStageVolume(t *testing.T) {
if tc.expReadAheadUpdate == false && readAheadUpdateCalled == true {
t.Fatalf("Test updated read ahead, but it was not expected.")
}
if tc.btrfsReclaimData == "" && tc.btrfsReclaimMetadata == "" && blkidCalled {
if tc.btrfsReclaimData == "" && tc.btrfsReclaimMetadata == "" && tc.btrfsReadAheadKb == "" && blkidCalled {
t.Fatalf("blkid was called, but was not expected.")
}

if tc.btrfsReclaimData != "" {
fname := btrfsPrefix + "/data/bg_reclaim_threshold"
got, err := os.ReadFile(fname)
if err != nil {
t.Fatalf("read %q: %v", fname, err)
}
if s := strings.TrimSpace(string(got)); s != tc.btrfsReclaimData {
t.Fatalf("%q: expected %q, got %q", fname, tc.btrfsReclaimData, s)
}
btrfsProps := map[string]string{
"/allocation/data/bg_reclaim_threshold": tc.btrfsReclaimData,
"/allocation/metadata/bg_reclaim_threshold": tc.btrfsReclaimMetadata,
"/bdi/read_ahead_kb": tc.btrfsReadAheadKb,
}
if tc.btrfsReclaimMetadata != "" {
fname := btrfsPrefix + "/metadata/bg_reclaim_threshold"
got, err := os.ReadFile(fname)

for fname, prop := range btrfsProps {
if prop == "" {
continue
}

got, err := os.ReadFile(btrfsPrefix + fname)
if err != nil {
t.Fatalf("read %q: %v", fname, err)
t.Fatalf("read %q: %v", btrfsPrefix+fname, err)
}
if s := strings.TrimSpace(string(got)); s != tc.btrfsReclaimMetadata {
t.Fatalf("%q: expected %q, got %q", fname, tc.btrfsReclaimMetadata, s)
if s := strings.TrimSpace(string(got)); s != prop {
t.Fatalf("%q: expected %q, got %q", btrfsPrefix+fname, prop, s)
}

}
})
}
Expand Down
8 changes: 5 additions & 3 deletions pkg/gce-pd-csi-driver/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,18 +306,20 @@ func collectMountOptions(fsType string, mntFlags []string) []string {
var options []string

for _, opt := range mntFlags {
// The flags below are special flags that aren't
// passed directly as an options to the mount command.
if readAheadKBMountFlagRegex.FindString(opt) != "" {
// The read_ahead_kb flag is a special flag that isn't
// passed directly as an option to the mount command.
continue
}

if btrfsReclaimDataRegex.FindString(opt) != "" {
continue
}
if btrfsReclaimMetadataRegex.FindString(opt) != "" {
continue
}
if btrfsReadAheadKBRegex.FindString(opt) != "" {
continue
}

options = append(options, opt)
}
Expand Down