Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cache/refs.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/containerd/containerd/images"
"github.com/containerd/containerd/leases"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/pkg/userns"
"github.com/containerd/containerd/snapshots"
"github.com/docker/docker/pkg/idtools"
"github.com/hashicorp/go-multierror"
Expand All @@ -27,6 +28,7 @@ import (
"github.com/moby/buildkit/util/flightcontrol"
"github.com/moby/buildkit/util/leaseutil"
"github.com/moby/buildkit/util/progress"
rootlessmountopts "github.com/moby/buildkit/util/rootless/mountopts"
"github.com/moby/buildkit/util/winlayers"
"github.com/moby/sys/mountinfo"
digest "github.com/opencontainers/go-digest"
Expand Down Expand Up @@ -1640,6 +1642,12 @@ func (sm *sharableMountable) Mount() (_ []mount.Mount, _ func() error, retErr er
os.Remove(dir)
}
}()
if userns.RunningInUserNS() {
mounts, err = rootlessmountopts.FixUp(mounts)
if err != nil {
return nil, nil, err
}
}
if err := mount.All(mounts, dir); err != nil {
return nil, nil, err
}
Expand Down
11 changes: 11 additions & 0 deletions docs/rootless.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ spec:

See also the [example manifests](#Kubernetes).

### Bottlerocket OS

Needs to run `sysctl -w user.max_user_namespaces=N` (N=positive integer, like 63359) on the host nodes.

See [`../examples/kubernetes/sysctl-userns.privileged.yaml`](../examples/kubernetes/sysctl-userns.privileged.yaml).

<details>
<summary>Old distributions</summary>

Expand Down Expand Up @@ -104,6 +110,11 @@ See https://rootlesscontaine.rs/getting-started/common/subuid/
### Error `Options:[rbind ro]}]: operation not permitted`
Make sure to mount an `emptyDir` volume on `/home/user/.local/share/buildkit` .

### Error `fork/exec /proc/self/exe: no space left on device` with `level=warning msg="/proc/sys/user/max_user_namespaces needs to be set to non-zero."`
Run `sysctl -w user.max_user_namespaces=N` (N=positive integer, like 63359) on the host nodes.

See [`../examples/kubernetes/sysctl-userns.privileged.yaml`](../examples/kubernetes/sysctl-userns.privileged.yaml).

## Containerized deployment

### Kubernetes
Expand Down
26 changes: 26 additions & 0 deletions examples/kubernetes/sysctl-userns.privileged.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Run `sysctl -w user.max_user_namespaces=63359` on all the nodes,
# for errors like "/proc/sys/user/max_user_namespaces needs to be set to non-zero"
# on running rootless buildkitd pods.
#
# This workaround is known to be needed on Bottlerocket OS.
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app: sysctl-userns
name: sysctl-userns
spec:
selector:
matchLabels:
app: sysctl-userns
template:
metadata:
labels:
app: sysctl-userns
spec:
containers:
- name: sysctl-userns
image: busybox
command: ["sh", "-euxc", "sysctl -w user.max_user_namespaces=63359 && sleep infinity"]
securityContext:
privileged: true
10 changes: 10 additions & 0 deletions executor/oci/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@ import (
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/oci"
"github.com/containerd/containerd/pkg/userns"
"github.com/containerd/continuity/fs"
"github.com/docker/docker/pkg/idtools"
"github.com/mitchellh/hashstructure/v2"
"github.com/moby/buildkit/executor"
"github.com/moby/buildkit/snapshot"
"github.com/moby/buildkit/util/network"
rootlessmountopts "github.com/moby/buildkit/util/rootless/mountopts"
traceexec "github.com/moby/buildkit/util/tracing/exec"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux"
Expand Down Expand Up @@ -192,6 +194,14 @@ func GenerateSpec(ctx context.Context, meta executor.Meta, mounts []executor.Mou
}

s.Mounts = dedupMounts(s.Mounts)

if userns.RunningInUserNS() {
s.Mounts, err = rootlessmountopts.FixUpOCI(s.Mounts)
if err != nil {
return nil, nil, err
}
}

return s, releaseAll, nil
}

Expand Down
10 changes: 10 additions & 0 deletions snapshot/localmounter_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"syscall"

"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/pkg/userns"
rootlessmountopts "github.com/moby/buildkit/util/rootless/mountopts"
"github.com/pkg/errors"
)

Expand All @@ -24,6 +26,14 @@ func (lm *localMounter) Mount() (string, error) {
lm.release = release
}

if userns.RunningInUserNS() {
var err error
lm.mounts, err = rootlessmountopts.FixUp(lm.mounts)
if err != nil {
return "", err
}
}

if len(lm.mounts) == 1 && (lm.mounts[0].Type == "bind" || lm.mounts[0].Type == "rbind") {
ro := false
for _, opt := range lm.mounts[0].Options {
Expand Down
88 changes: 88 additions & 0 deletions util/rootless/mountopts/mountopts_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package mountopts

import (
"github.com/containerd/containerd/mount"
"github.com/moby/buildkit/util/strutil"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)

// UnprivilegedMountFlags gets the set of mount flags that are set on the mount that contains the given
// path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that
// bind-mounting "with options" will not fail with user namespaces, due to
// kernel restrictions that require user namespace mounts to preserve
// CL_UNPRIVILEGED locked flags.
//
// From https://github.com/moby/moby/blob/v23.0.1/daemon/oci_linux.go#L430-L460
func UnprivilegedMountFlags(path string) ([]string, error) {
var statfs unix.Statfs_t
if err := unix.Statfs(path, &statfs); err != nil {
return nil, err
}

// The set of keys come from https://github.com/torvalds/linux/blob/v4.13/fs/namespace.c#L1034-L1048.
unprivilegedFlags := map[uint64]string{
unix.MS_RDONLY: "ro",
unix.MS_NODEV: "nodev",
unix.MS_NOEXEC: "noexec",
unix.MS_NOSUID: "nosuid",
unix.MS_NOATIME: "noatime",
unix.MS_RELATIME: "relatime",
unix.MS_NODIRATIME: "nodiratime",
}

var flags []string
for mask, flag := range unprivilegedFlags {
if uint64(statfs.Flags)&mask == mask {
flags = append(flags, flag)
}
}

return flags, nil
}

// FixUp is for https://github.com/moby/buildkit/issues/3098
func FixUp(mounts []mount.Mount) ([]mount.Mount, error) {
for i, m := range mounts {
var isBind bool
for _, o := range m.Options {
switch o {
case "bind", "rbind":
isBind = true
}
}
if !isBind {
continue
}
unpriv, err := UnprivilegedMountFlags(m.Source)
if err != nil {
return nil, errors.Wrapf(err, "failed to get unprivileged mount flags for %+v", m)
}
m.Options = strutil.DedupeSlice(append(m.Options, unpriv...))
mounts[i] = m
}
return mounts, nil
}

func FixUpOCI(mounts []specs.Mount) ([]specs.Mount, error) {
for i, m := range mounts {
var isBind bool
for _, o := range m.Options {
switch o {
case "bind", "rbind":
isBind = true
}
}
if !isBind {
continue
}
unpriv, err := UnprivilegedMountFlags(m.Source)
if err != nil {
return nil, errors.Wrapf(err, "failed to get unprivileged mount flags for %+v", m)
}
m.Options = strutil.DedupeSlice(append(m.Options, unpriv...))
mounts[i] = m
}
return mounts, nil
}
21 changes: 21 additions & 0 deletions util/rootless/mountopts/mountopts_others.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//go:build !linux
// +build !linux

package mountopts

import (
"github.com/containerd/containerd/mount"
specs "github.com/opencontainers/runtime-spec/specs-go"
)

func UnprivilegedMountFlags(path string) ([]string, error) {
return []string{}, nil
}

func FixUp(mounts []mount.Mount) ([]mount.Mount, error) {
return mounts, nil
}

func FixUpOCI(mounts []specs.Mount) ([]specs.Mount, error) {
return mounts, nil
}
30 changes: 30 additions & 0 deletions util/strutil/strutil.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
Copyright The containerd Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package strutil

// DedupeSlice is from https://github.com/containerd/nerdctl/blob/v1.2.1/pkg/strutil/strutil.go#L72-L82
func DedupeSlice(in []string) []string {
m := make(map[string]struct{})
var res []string
for _, s := range in {
if _, ok := m[s]; !ok {
res = append(res, s)
m[s] = struct{}{}
}
}
return res
}