From 4aadc968c83747288b7b8a3079917499f1cd750b Mon Sep 17 00:00:00 2001 From: lifubang Date: Sun, 27 Oct 2024 00:14:00 +0800 Subject: [PATCH] libct: fix stdio permission error for userns container If the root in the container is different from current root user, we need to change the owner of stdio before we enter the user namespace, or else we may can't access stdio in the container. Please see https://github.com/opencontainers/runc/issues/4475 Signed-off-by: lifubang --- libcontainer/container_linux.go | 54 +++++++++++++++++++++++++++++++++ libcontainer/init_linux.go | 46 +--------------------------- 2 files changed, 55 insertions(+), 45 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 12ee6a3ef95..b1efa1ab5de 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -346,6 +346,18 @@ func (c *Container) start(process *Process) (retErr error) { if err := utils.CloseExecFrom(3); err != nil { return fmt.Errorf("unable to mark non-stdio fds as cloexec: %w", err) } + + // If the root in the container is different from current root user, we + // need to change the owner of stdio before we enter the user namespace, + // or else we may can't access stdio in the container. + // Please see https://github.com/opencontainers/runc/issues/4475 + containerRootUid, err := c.config.HostRootUID() + if err != nil { + return fmt.Errorf("unable to get root uid of the container: %w", err) + } + if err := fixStdioPermissions(containerRootUid); err != nil { + return fmt.Errorf("unable to change permission of stdio: %w", err) + } if err := parent.start(); err != nil { return fmt.Errorf("unable to start container process: %w", err) } @@ -506,6 +518,48 @@ func isDmzBinarySafe(c *configs.Config) bool { return false } +// fixStdioPermissions fixes the permissions of STDIO to the specified user. +func fixStdioPermissions(uid int) error { + var null unix.Stat_t + if err := unix.Stat("/dev/null", &null); err != nil { + return &os.PathError{Op: "stat", Path: "/dev/null", Err: err} + } + for _, file := range []*os.File{os.Stdin, os.Stdout, os.Stderr} { + var s unix.Stat_t + if err := unix.Fstat(int(file.Fd()), &s); err != nil { + return &os.PathError{Op: "fstat", Path: file.Name(), Err: err} + } + + // Skip chown if uid is already the one we want or any of the STDIO descriptors + // were redirected to /dev/null. + if int(s.Uid) == uid || s.Rdev == null.Rdev { + continue + } + + // We only change the uid (as it is possible for the mount to + // prefer a different gid, and there's no reason for us to change it). + // The reason why we don't just leave the default uid=X mount setup is + // that users expect to be able to actually use their console. Without + // this code, you couldn't effectively run as a non-root user inside a + // container and also have a console set up. + if err := file.Chown(uid, int(s.Gid)); err != nil { + // If we've hit an EINVAL then s.Gid isn't mapped in the user + // namespace. If we've hit an EPERM then the inode's current owner + // is not mapped in our user namespace (in particular, + // privileged_wrt_inode_uidgid() has failed). Read-only + // /dev can result in EROFS error. In any case, it's + // better for us to just not touch the stdio rather + // than bail at this point. + + if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) || errors.Is(err, unix.EROFS) { + continue + } + return err + } + } + return nil +} + func (c *Container) newParentProcess(p *Process) (parentProcess, error) { comm, err := newProcessComm() if err != nil { diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 176a4cdc12b..1e81c1729d5 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -516,7 +516,7 @@ func setupUser(config *initConfig) error { // Before we change to the container's user make sure that the processes // STDIO is correctly owned by the user that we are switching to. - if err := fixStdioPermissions(execUser); err != nil { + if err := fixStdioPermissions(execUser.Uid); err != nil { return err } @@ -563,50 +563,6 @@ func setupUser(config *initConfig) error { return nil } -// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user. -// The ownership needs to match because it is created outside of the container and needs to be -// localized. -func fixStdioPermissions(u *user.ExecUser) error { - var null unix.Stat_t - if err := unix.Stat("/dev/null", &null); err != nil { - return &os.PathError{Op: "stat", Path: "/dev/null", Err: err} - } - for _, file := range []*os.File{os.Stdin, os.Stdout, os.Stderr} { - var s unix.Stat_t - if err := unix.Fstat(int(file.Fd()), &s); err != nil { - return &os.PathError{Op: "fstat", Path: file.Name(), Err: err} - } - - // Skip chown if uid is already the one we want or any of the STDIO descriptors - // were redirected to /dev/null. - if int(s.Uid) == u.Uid || s.Rdev == null.Rdev { - continue - } - - // We only change the uid (as it is possible for the mount to - // prefer a different gid, and there's no reason for us to change it). - // The reason why we don't just leave the default uid=X mount setup is - // that users expect to be able to actually use their console. Without - // this code, you couldn't effectively run as a non-root user inside a - // container and also have a console set up. - if err := file.Chown(u.Uid, int(s.Gid)); err != nil { - // If we've hit an EINVAL then s.Gid isn't mapped in the user - // namespace. If we've hit an EPERM then the inode's current owner - // is not mapped in our user namespace (in particular, - // privileged_wrt_inode_uidgid() has failed). Read-only - // /dev can result in EROFS error. In any case, it's - // better for us to just not touch the stdio rather - // than bail at this point. - - if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) || errors.Is(err, unix.EROFS) { - continue - } - return err - } - } - return nil -} - // setupNetwork sets up and initializes any network interface inside the container. func setupNetwork(config *initConfig) error { for _, config := range config.Networks {