From f06d4e7ebebe397c0af85bf2a8c938ea88b0e080 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Mon, 3 Mar 2025 12:34:47 -0800 Subject: [PATCH] goferfs: Add S/R support for open FDs to deleted files. This support is only needed when the gofer mount in question is writable. By default, the rootfs has an overlayfs applied, so the gofer lower layer is not writabled. But if you are using --overlay2=none, then this change should allow you to save sandbox with open FDs to deleted files in rootfs. Updates #11425 PiperOrigin-RevId: 733021267 --- pkg/sentry/fsimpl/gofer/dentry_impl.go | 6 +- pkg/sentry/fsimpl/gofer/directfs_dentry.go | 31 ++--- pkg/sentry/fsimpl/gofer/filesystem.go | 2 +- pkg/sentry/fsimpl/gofer/gofer.go | 8 ++ pkg/sentry/fsimpl/gofer/lisafs_dentry.go | 13 ++- pkg/sentry/fsimpl/gofer/save_restore.go | 129 ++++++++++++++++++++- test/runner/main.go | 3 +- test/syscalls/linux/BUILD | 1 + test/syscalls/linux/unlink.cc | 8 +- test/util/test_util.cc | 5 + test/util/test_util.h | 2 + 11 files changed, 179 insertions(+), 29 deletions(-) diff --git a/pkg/sentry/fsimpl/gofer/dentry_impl.go b/pkg/sentry/fsimpl/gofer/dentry_impl.go index 77fd56f699..b1cda86715 100644 --- a/pkg/sentry/fsimpl/gofer/dentry_impl.go +++ b/pkg/sentry/fsimpl/gofer/dentry_impl.go @@ -393,12 +393,12 @@ func (d *dentry) symlink(ctx context.Context, name, target string, creds *auth.C } // Precondition: !d.isSynthetic(). -func (d *dentry) openCreate(ctx context.Context, name string, accessFlags uint32, mode linux.FileMode, uid auth.KUID, gid auth.KGID) (*dentry, handle, error) { +func (d *dentry) openCreate(ctx context.Context, name string, accessFlags uint32, mode linux.FileMode, uid auth.KUID, gid auth.KGID, createDentry bool) (*dentry, handle, error) { switch dt := d.impl.(type) { case *lisafsDentry: - return dt.openCreate(ctx, name, accessFlags, mode, uid, gid) + return dt.openCreate(ctx, name, accessFlags, mode, uid, gid, createDentry) case *directfsDentry: - return dt.openCreate(name, accessFlags, mode, uid, gid) + return dt.openCreate(name, accessFlags, mode, uid, gid, createDentry) default: panic("unknown dentry implementation") } diff --git a/pkg/sentry/fsimpl/gofer/directfs_dentry.go b/pkg/sentry/fsimpl/gofer/directfs_dentry.go index cf260d4416..b1f8901aeb 100644 --- a/pkg/sentry/fsimpl/gofer/directfs_dentry.go +++ b/pkg/sentry/fsimpl/gofer/directfs_dentry.go @@ -453,7 +453,7 @@ func (d *directfsDentry) getXattr(ctx context.Context, name string, size uint64) // getCreatedChild opens the newly created child, sets its uid/gid, constructs // a disconnected dentry and returns it. -func (d *directfsDentry) getCreatedChild(name string, uid auth.KUID, gid auth.KGID, isDir bool) (*dentry, error) { +func (d *directfsDentry) getCreatedChild(name string, uid auth.KUID, gid auth.KGID, isDir bool, createDentry bool) (*dentry, error) { unlinkFlags := 0 extraOpenFlags := 0 if isDir { @@ -481,12 +481,15 @@ func (d *directfsDentry) getCreatedChild(name string, uid auth.KUID, gid auth.KG return nil, err } - child, err := d.fs.newDirectfsDentry(childFD) - if err != nil { - // Ownership of childFD was passed to newDirectDentry(), so no need to - // clean that up. - deleteChild() - return nil, err + var child *dentry + if createDentry { + child, err = d.fs.newDirectfsDentry(childFD) + if err != nil { + // Ownership of childFD was passed to newDirectDentry(), so no need to + // clean that up. + deleteChild() + return nil, err + } } return child, nil } @@ -506,7 +509,7 @@ func (d *directfsDentry) mknod(ctx context.Context, name string, creds *auth.Cre if err := unix.Mknodat(d.controlFD, name, uint32(opts.Mode), 0); err != nil { return nil, err } - return d.getCreatedChild(name, creds.EffectiveKUID, creds.EffectiveKGID, false /* isDir */) + return d.getCreatedChild(name, creds.EffectiveKUID, creds.EffectiveKGID, false /* isDir */, true /* createDentry */) } // Precondition: opts.Endpoint != nil and is transport.HostBoundEndpoint type. @@ -531,7 +534,7 @@ func (d *directfsDentry) bindAt(ctx context.Context, name string, creds *auth.Cr return nil, err } // Socket already has the right UID/GID set, so use uid = gid = -1. - child, err := d.getCreatedChild(name, auth.NoID /* uid */, auth.NoID /* gid */, false /* isDir */) + child, err := d.getCreatedChild(name, auth.NoID /* uid */, auth.NoID /* gid */, false /* isDir */, true /* createDentry */) if err != nil { hbep.ResetBoundSocketFD(ctx) return nil, err @@ -559,31 +562,31 @@ func (d *directfsDentry) link(target *directfsDentry, name string) (*dentry, err // link. The original file already has the right owner. // TODO(gvisor.dev/issue/6739): Hard linked dentries should share the same // inode fields. - return d.getCreatedChild(name, auth.NoID /* uid */, auth.NoID /* gid */, false /* isDir */) + return d.getCreatedChild(name, auth.NoID /* uid */, auth.NoID /* gid */, false /* isDir */, true /* createDentry */) } func (d *directfsDentry) mkdir(name string, mode linux.FileMode, uid auth.KUID, gid auth.KGID) (*dentry, error) { if err := unix.Mkdirat(d.controlFD, name, uint32(mode)); err != nil { return nil, err } - return d.getCreatedChild(name, uid, gid, true /* isDir */) + return d.getCreatedChild(name, uid, gid, true /* isDir */, true /* createDentry */) } func (d *directfsDentry) symlink(name, target string, creds *auth.Credentials) (*dentry, error) { if err := unix.Symlinkat(target, d.controlFD, name); err != nil { return nil, err } - return d.getCreatedChild(name, creds.EffectiveKUID, creds.EffectiveKGID, false /* isDir */) + return d.getCreatedChild(name, creds.EffectiveKUID, creds.EffectiveKGID, false /* isDir */, true /* createDentry */) } -func (d *directfsDentry) openCreate(name string, accessFlags uint32, mode linux.FileMode, uid auth.KUID, gid auth.KGID) (*dentry, handle, error) { +func (d *directfsDentry) openCreate(name string, accessFlags uint32, mode linux.FileMode, uid auth.KUID, gid auth.KGID, createDentry bool) (*dentry, handle, error) { createFlags := unix.O_CREAT | unix.O_EXCL | int(accessFlags) | hostOpenFlags childHandleFD, err := unix.Openat(d.controlFD, name, createFlags, uint32(mode&^linux.FileTypeMask)) if err != nil { return nil, noHandle, err } - child, err := d.getCreatedChild(name, uid, gid, false /* isDir */) + child, err := d.getCreatedChild(name, uid, gid, false /* isDir */, createDentry) if err != nil { _ = unix.Close(childHandleFD) return nil, noHandle, err diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 068ecd091c..f0a47acf8b 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -1277,7 +1277,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving kgid = auth.KGID(d.gid.Load()) } - child, h, err := d.openCreate(ctx, name, opts.Flags&linux.O_ACCMODE, opts.Mode, creds.EffectiveKUID, kgid) + child, h, err := d.openCreate(ctx, name, opts.Flags&linux.O_ACCMODE, opts.Mode, creds.EffectiveKUID, kgid, true /* createDentry */) if err != nil { return nil, err } diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 15cc818c7a..a10bfc0d6e 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -246,6 +246,10 @@ type filesystem struct { // savedDentryRW records open read/write handles during save/restore. savedDentryRW map[*dentry]savedDentryRW + // savedDeletedOpenDentries records deleted dentries that are saved during + // save/restore. These are still accessible via open application FDs. + savedDeletedOpenDentries map[*dentry]struct{} + // released is nonzero once filesystem.Release has been called. released atomicbitops.Int32 } @@ -978,6 +982,10 @@ type dentry struct { // tracks dirty segments in cache. dirty is protected by dataMu. dirty fsutil.DirtySet + // If this dentry represents a deleted regular file, deletedDataSR is used to + // store file data for save/restore. + deletedDataSR []byte + // pf implements memmap.File for mappings of hostFD. pf dentryPlatformFile diff --git a/pkg/sentry/fsimpl/gofer/lisafs_dentry.go b/pkg/sentry/fsimpl/gofer/lisafs_dentry.go index c1df8a7bdc..8061d2a616 100644 --- a/pkg/sentry/fsimpl/gofer/lisafs_dentry.go +++ b/pkg/sentry/fsimpl/gofer/lisafs_dentry.go @@ -448,7 +448,7 @@ func (d *lisafsDentry) symlink(ctx context.Context, name, target string, creds * return d.newChildDentry(ctx, &symlinkInode, name) } -func (d *lisafsDentry) openCreate(ctx context.Context, name string, flags uint32, mode linux.FileMode, uid auth.KUID, gid auth.KGID) (*dentry, handle, error) { +func (d *lisafsDentry) openCreate(ctx context.Context, name string, flags uint32, mode linux.FileMode, uid auth.KUID, gid auth.KGID, createDentry bool) (*dentry, handle, error) { ino, openFD, hostFD, err := d.controlFD.OpenCreateAt(ctx, name, flags, mode, lisafs.UID(uid), lisafs.GID(gid)) if err != nil { return nil, noHandle, err @@ -458,10 +458,13 @@ func (d *lisafsDentry) openCreate(ctx context.Context, name string, flags uint32 fdLisa: d.fs.client.NewFD(openFD), fd: int32(hostFD), } - child, err := d.fs.newLisafsDentry(ctx, &ino) - if err != nil { - h.close(ctx) - return nil, noHandle, err + var child *dentry + if createDentry { + child, err = d.fs.newLisafsDentry(ctx, &ino) + if err != nil { + h.close(ctx) + return nil, noHandle, err + } } return child, h, nil } diff --git a/pkg/sentry/fsimpl/gofer/save_restore.go b/pkg/sentry/fsimpl/gofer/save_restore.go index 3d1249f376..e6e69a84a0 100644 --- a/pkg/sentry/fsimpl/gofer/save_restore.go +++ b/pkg/sentry/fsimpl/gofer/save_restore.go @@ -21,12 +21,15 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/atomicbitops" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/vfs" ) @@ -50,6 +53,7 @@ func (fs *filesystem) PrepareSave(ctx context.Context) error { fs.renameMu.Lock() fs.evictAllCachedDentriesLocked(ctx) fs.renameMu.Unlock() + fs.savedDentryRW = make(map[*dentry]savedDentryRW) // Buffer pipe data so that it's available for reading after restore. (This // is a legacy VFS1 feature.) @@ -62,6 +66,16 @@ func (fs *filesystem) PrepareSave(ctx context.Context) error { } } } + // Save file data for deleted regular files which are still accessible via + // open application FDs. + for sd := fs.syncableDentries.Front(); sd != nil; sd = sd.Next() { + if sd.d.vfsd.IsDead() { + if err := sd.d.prepareSaveDead(ctx); err != nil { + fs.syncMu.Unlock() + return err + } + } + } fs.syncMu.Unlock() // Flush local state to the remote filesystem. @@ -69,7 +83,6 @@ func (fs *filesystem) PrepareSave(ctx context.Context) error { return err } - fs.savedDentryRW = make(map[*dentry]savedDentryRW) return fs.root.prepareSaveRecursive(ctx) } @@ -98,6 +111,62 @@ func (fd *specialFileFD) savePipeData(ctx context.Context) error { return nil } +func (d *dentry) prepareSaveDead(ctx context.Context) error { + if !d.isRegularFile() { + return fmt.Errorf("gofer.dentry(%q).prepareSaveDead: only regular deleted dentries can be saved, got %s", genericDebugPathname(d.fs, d), linux.FileMode(d.mode.Load())) + } + if !d.isDeleted() { + return fmt.Errorf("gofer.dentry(%q).prepareSaveDead: invalidated dentries can't be saved", genericDebugPathname(d.fs, d)) + } + if !d.cachedMetadataAuthoritative() { + if err := d.updateMetadata(ctx); err != nil { + return err + } + } + if d.isReadHandleOk() || d.isWriteHandleOk() { + d.fs.savedDentryRW[d] = savedDentryRW{ + read: d.isReadHandleOk(), + write: d.isWriteHandleOk(), + } + } + d.handleMu.RLock() + defer d.handleMu.RUnlock() + var h handle + if d.isReadHandleOk() { + h = d.readHandle() + } else { + var err error + h, err = d.openHandle(ctx, true /* read */, false /* write */, false /* trunc */) + if err != nil { + return fmt.Errorf("failed to open read handle for deleted file %q: %w", genericDebugPathname(d.fs, d), err) + } + defer h.close(ctx) + } + d.dataMu.RLock() + defer d.dataMu.RUnlock() + d.deletedDataSR = make([]byte, d.size.Load()) + done := uint64(0) + for done < uint64(len(d.deletedDataSR)) { + n, err := h.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(d.deletedDataSR[done:])), done) + done += n + if err != nil { + if err == io.EOF { + break + } + return fmt.Errorf("failed to read deleted file %q: %w", genericDebugPathname(d.fs, d), err) + } + } + if done < uint64(len(d.deletedDataSR)) { + return fmt.Errorf("failed to read all of deleted file %q: read %d bytes, expected %d", genericDebugPathname(d.fs, d), done, len(d.deletedDataSR)) + } + d.deletedDataSR = d.deletedDataSR[:done] + if d.fs.savedDeletedOpenDentries == nil { + d.fs.savedDeletedOpenDentries = make(map[*dentry]struct{}) + } + d.fs.savedDeletedOpenDentries[d] = struct{}{} + return nil +} + func (d *dentry) prepareSaveRecursive(ctx context.Context) error { if d.isRegularFile() && !d.cachedMetadataAuthoritative() { // Get updated metadata for d in case we need to perform metadata @@ -131,13 +200,17 @@ func (d *dentry) prepareSaveRecursive(ctx context.Context) error { // beforeSave is invoked by stateify. func (d *dentry) beforeSave() { - if d.vfsd.IsDead() { - panic(fmt.Sprintf("gofer.dentry(%q).beforeSave: deleted and invalidated dentries can't be restored", genericDebugPathname(d.fs, d))) + if d.vfsd.IsDead() && d.deletedDataSR == nil { + panic(fmt.Sprintf("gofer.dentry(%q).beforeSave: deletedDataSR is nil for dead dentry (deleted=%t, synthetic=%t)", genericDebugPathname(d.fs, d), d.isDeleted(), d.isSynthetic())) } } // BeforeResume implements vfs.FilesystemImplSaveRestoreExtension.BeforeResume. func (fs *filesystem) BeforeResume(ctx context.Context) { + for d := range fs.savedDeletedOpenDentries { + d.deletedDataSR = nil + } + fs.savedDeletedOpenDentries = nil fs.savedDentryRW = nil } @@ -236,7 +309,15 @@ func (fs *filesystem) CompleteRestore(ctx context.Context, opts vfs.CompleteRest } } + // Restore deleted files which are still accessible via open application FDs. + for d := range fs.savedDeletedOpenDentries { + if err := d.restoreDead(ctx, &opts); err != nil { + return err + } + } + // Discard state only required during restore. + fs.savedDeletedOpenDentries = nil fs.savedDentryRW = nil return nil @@ -263,6 +344,48 @@ func (d *dentry) restoreDescendantsRecursive(ctx context.Context, opts *vfs.Comp return nil } +// restoreDead restores a deleted regular file. +// +// Preconditions: d.deletedDataSR != nil. +func (d *dentry) restoreDead(ctx context.Context, opts *vfs.CompleteRestoreOptions) error { + // Recreate the file on the host filesystem (this is temporary). + parent := d.parent.Load() + _, h, err := parent.openCreate(ctx, d.name, linux.O_WRONLY, linux.FileMode(d.mode.Load()), auth.KUID(d.uid.Load()), auth.KGID(d.gid.Load()), false /* createDentry */) + if err != nil { + return fmt.Errorf("failed to re-create deleted file %q: %w", genericDebugPathname(d.fs, d), err) + } + defer h.close(ctx) + // In case of errors, clean up the recreated file. + unlinkCU := cleanup.Make(func() { + if err := parent.unlink(ctx, d.name, 0 /* flags */); err != nil { + log.Warningf("failed to clean up recreated deleted file %q: %v", genericDebugPathname(d.fs, d), err) + } + }) + defer unlinkCU.Clean() + // Write the file data to the recreated file. + n, err := h.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(d.deletedDataSR)), 0) + if err != nil { + return fmt.Errorf("failed to write deleted file %q: %w", genericDebugPathname(d.fs, d), err) + } + if n != uint64(len(d.deletedDataSR)) { + return fmt.Errorf("failed to write all of deleted file %q: wrote %d bytes, expected %d", genericDebugPathname(d.fs, d), n, len(d.deletedDataSR)) + } + d.deletedDataSR = nil + // Restore the file. Note that timestamps may not match since we re-created + // the file on the host. + recreateOpts := *opts + recreateOpts.ValidateFileModificationTimestamps = false + if err := d.restoreFile(ctx, &recreateOpts); err != nil { + return err + } + // Finally, unlink the recreated file. + unlinkCU.Release() + if err := parent.unlink(ctx, d.name, 0 /* flags */); err != nil { + return fmt.Errorf("failed to clean up recreated deleted file %q: %v", genericDebugPathname(d.fs, d), err) + } + return nil +} + func (fd *specialFileFD) completeRestore(ctx context.Context) error { d := fd.dentry() h, err := d.openHandle(ctx, fd.vfsfd.IsReadable(), fd.vfsfd.IsWritable(), false /* trunc */) diff --git a/test/runner/main.go b/test/runner/main.go index 613cdde7d1..0715c0b52a 100644 --- a/test/runner/main.go +++ b/test/runner/main.go @@ -892,11 +892,12 @@ func runTestCaseRunsc(testBin string, tc *gtest.TestCase, args []string, t *test const ( platformVar = "TEST_ON_GVISOR" networkVar = "GVISOR_NETWORK" + runtimeVar = "GVISOR_RUNTIME" ioUringVar = "IOURING_ENABLED" fuseVar = "GVISOR_FUSE_TEST" saveVar = "GVISOR_SAVE_TEST" ) - env := append(os.Environ(), platformVar+"="+*platform, networkVar+"="+*network) + env := append(os.Environ(), platformVar+"="+*platform, networkVar+"="+*network, runtimeVar+"=runsc") if *platformSupport != "" { env = append(env, fmt.Sprintf("%s=%s", platformSupportEnvVar, *platformSupport)) } diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index e27bef0f6a..29bda2386b 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -4290,6 +4290,7 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "//test/util:save_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", diff --git a/test/syscalls/linux/unlink.cc b/test/syscalls/linux/unlink.cc index 41d7873e61..c130aa88ac 100644 --- a/test/syscalls/linux/unlink.cc +++ b/test/syscalls/linux/unlink.cc @@ -21,6 +21,7 @@ #include "test/util/capability_util.h" #include "test/util/file_descriptor.h" #include "test/util/fs_util.h" +#include "test/util/save_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" @@ -162,8 +163,11 @@ TEST(UnlinkTest, AtFile) { } TEST(UnlinkTest, OpenFile) { - // We can't save unlinked file unless they are on tmpfs. - const DisableSave ds; + // TODO(b/400287667): Enable save/restore for local gofer. + DisableSave ds; + if (IsRunningOnRunsc()) { + ds.reset(); + } auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); int fd; EXPECT_THAT(fd = open(file.path().c_str(), O_RDWR, 0666), SyscallSucceeds()); diff --git a/test/util/test_util.cc b/test/util/test_util.cc index 877912e1e1..a234289505 100644 --- a/test/util/test_util.cc +++ b/test/util/test_util.cc @@ -54,6 +54,11 @@ const std::string GvisorPlatform() { return std::string(env); } +bool IsRunningOnRunsc() { + const char* env = getenv(kGvisorRuntime); + return env && strcmp(env, "runsc") == 0; +} + bool IsRunningWithHostinet() { const char* env = getenv(kGvisorNetwork); return env && strcmp(env, "host") == 0; diff --git a/test/util/test_util.h b/test/util/test_util.h index 1ace573018..adb84d31e3 100644 --- a/test/util/test_util.h +++ b/test/util/test_util.h @@ -204,6 +204,7 @@ namespace gvisor { namespace testing { constexpr char kTestOnGvisor[] = "TEST_ON_GVISOR"; +constexpr char kGvisorRuntime[] = "GVISOR_RUNTIME"; // TestInit must be called prior to RUN_ALL_TESTS. // @@ -231,6 +232,7 @@ constexpr char kSystrap[] = "systrap"; } // namespace Platform bool IsRunningOnGvisor(); +bool IsRunningOnRunsc(); const std::string GvisorPlatform(); bool IsRunningWithHostinet(); bool IsIOUringEnabled();