From e0f5d465a60c87201435cfb3e7561cad9d40400a Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Tue, 9 Aug 2016 17:22:26 +0800 Subject: [PATCH 1/3] Godeps: specs-go: update specs for Intel RDT/CAT NOTE: this patch is only for purpose of compiling runc. It is not necessary if the dependent runtime-spec patch is merged. Signed-off-by: Xiaochen Shen --- .../opencontainers/runtime-spec/specs-go/config.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Godeps/_workspace/src/github.com/opencontainers/runtime-spec/specs-go/config.go b/Godeps/_workspace/src/github.com/opencontainers/runtime-spec/specs-go/config.go index 491b734c937..148ab6fc921 100644 --- a/Godeps/_workspace/src/github.com/opencontainers/runtime-spec/specs-go/config.go +++ b/Godeps/_workspace/src/github.com/opencontainers/runtime-spec/specs-go/config.go @@ -311,6 +311,13 @@ type Network struct { Priorities []InterfacePriority `json:"priorities,omitempty"` } +// IntelRdt for Linux Intel RDT/CAT resource management (Linux 4.10) +type IntelRdt struct { + // The schema for L3 cache id and capacity bitmask (CBM) + // Format: "L3:=;=;..." + L3CacheSchema *string `json:"l3CacheSchema,omitempty"` +} + // Resources has container runtime resource constraints type Resources struct { // Devices configures the device whitelist. @@ -331,6 +338,8 @@ type Resources struct { HugepageLimits []HugepageLimit `json:"hugepageLimits,omitempty"` // Network restriction configuration Network *Network `json:"network,omitempty"` + // IntelRdt restriction configuration + IntelRdt *IntelRdt `json:"intelRdt,omitempty"` } // Device represents the mknod information for a Linux special device file From e6adcba9b716138715e9fb3848dd7994151fa1a2 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Wed, 10 Aug 2016 01:11:46 +0800 Subject: [PATCH 2/3] libcontainer/SPEC.md: add documentation for Intel RDT/CAT Signed-off-by: Xiaochen Shen --- libcontainer/SPEC.md | 87 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/libcontainer/SPEC.md b/libcontainer/SPEC.md index e5894c6429d..265073c16e9 100644 --- a/libcontainer/SPEC.md +++ b/libcontainer/SPEC.md @@ -154,6 +154,93 @@ that no processes or threads escape the cgroups. This sync is done via a pipe ( specified in the runtime section below ) that the container's init process will block waiting for the parent to finish setup. +**intelRdt**: +Intel platforms with new Xeon CPU support Intel Resource Director Technology +(RDT). Cache Allocation Technology (CAT) is a sub-feature of RDT, which +currently supports L3 cache resource allocation. + +This feature provides a way for the software to restrict cache allocation to a +defined 'subset' of L3 cache which may be overlapping with other 'subsets'. +The different subsets are identified by class of service (CLOS) and each CLOS +has a capacity bitmask (CBM). + +It can be used to handle L3 cache resource allocation for containers if +hardware and kernel support Intel RDT/CAT. + +`intelRdt` is implemented as the `intel_rdt` cgroup subsystem in libcontainer +even though the Linux kernel interface is not real cgroup. When intelRdt is +joined, the statistics can be collected from intel_rdt cgroup subsystem. + +In Linux kernel, it is exposed via "resource control" filesystem, which is a +"cgroup-like" interface. + +Comparing with cgroups, it has similar process management lifecycle and +interfaces in a container. But unlike cgroups' hierarchy, it has single level +filesystem layout. + +Intel RDT "resource control" filesystem hierarchy: +``` +mount -t resctrl resctrl /sys/fs/resctrl +tree /sys/fs/resctrl +/sys/fs/resctrl/ +|-- info +| |-- L3 +| |-- cbm_mask +| |-- num_closids +|-- cpus +|-- schemata +|-- tasks +|-- + |-- cpus + |-- schemata + |-- tasks + +``` + +For runc, we can make use of `tasks` and `schemata` configuration for L3 cache +resource constraints. + +The file `tasks` has a list of tasks that belongs to this group (e.g., +" group). Tasks can be added to a group by writing the task ID +to the "tasks" file (which will automatically remove them from the previous +group to which they belonged). New tasks created by fork(2) and clone(2) are +added to the same group as their parent. If a pid is not in any sub group, it +is in root group. + +The file `schemata` has allocation masks/values for L3 cache on each socket, +which contains L3 cache id and capacity bitmask (CBM). +``` + Format: "L3:=;=;..." +``` +For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0` +Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. + +The valid L3 cache CBM is a *contiguous bits set* and number of bits that can +be set is less than the max bit. The max bits in the CBM is varied among +supported Intel Xeon platforms. In Intel RDT "resource control" filesystem +layout, the CBM in a group should be a subset of the CBM in root. Kernel will +check if it is valid when writing. e.g., 0xfffff in root indicates the max bits +of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM +values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + +For more information about Intel RDT/CAT kernel interface: +https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/commit/?h=x86/cache&id=f20e57892806ad244eaec7a7ae365e78fee53377 + +An example for runc: +``` +There are two L3 caches in the two-socket machine, the default CBM is 0xfffff +and the max CBM length is 20 bits. This configuration assigns 4/5 of L3 cache +id 0 and the whole L3 cache id 1 for the container: + +"linux": { + "resources": { + "intelRdt": { + "l3CacheSchema": "L3:0=ffff0;1=fffff" + } + } +} +``` + ### Security The standard set of Linux capabilities that are set in a container From 12877b2480119b5939e00022b70215b8d01d99b2 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Thu, 17 Nov 2016 00:59:56 +0800 Subject: [PATCH 3/3] libcontainer: add support for Intel RDT/CAT in runc About Intel RDT/CAT feature: Intel platforms with new Xeon CPU support Intel Resource Director Technology (RDT). Cache Allocation Technology (CAT) is a sub-feature of RDT, which currently supports L3 cache resource allocation. This feature provides a way for the software to restrict cache allocation to a defined 'subset' of L3 cache which may be overlapping with other 'subsets'. The different subsets are identified by class of service (CLOS) and each CLOS has a capacity bitmask (CBM). For more information about Intel RDT/CAT can be found in the section 17.17 of Intel Software Developer Manual. About Intel RDT/CAT kernel interface: In Linux kernel, the interface is defined and exposed via "resource control" filesystem, which is a "cgroup-like" interface. Comparing with cgroups, it has similar process management lifecycle and interfaces in a container. But unlike cgroups' hierarchy, it has single level filesystem layout. Intel RDT "resource control" filesystem hierarchy: mount -t resctrl resctrl /sys/fs/resctrl tree /sys/fs/resctrl /sys/fs/resctrl/ |-- info | |-- L3 | |-- cbm_mask | |-- num_closids |-- cpus |-- schemata |-- tasks |-- |-- cpus |-- schemata |-- tasks For runc, we can make use of `tasks` and `schemata` configuration for L3 cache resource constraints. The file `tasks` has a list of tasks that belongs to this group (e.g., " group). Tasks can be added to a group by writing the task ID to the "tasks" file (which will automatically remove them from the previous group to which they belonged). New tasks created by fork(2) and clone(2) are added to the same group as their parent. If a pid is not in any sub group, it Is in root group. The file `schemata` has allocation bitmasks/values for L3 cache on each socket, which contains L3 cache id and capacity bitmask (CBM). Format: "L3:=;=;..." For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0` which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. The valid L3 cache CBM is a *contiguous bits set* and number of bits that can be set is less than the max bit. The max bits in the CBM is varied among supported Intel Xeon platforms. In Intel RDT "resource control" filesystem layout, the CBM in a group should be a subset of the CBM in root. Kernel will check if it is valid when writing. e.g., 0xfffff in root indicates the max bits of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. For more information about Intel RDT/CAT kernel interface: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/commit/?h=x86/cache&id=f20e57892806ad244eaec7a7ae365e78fee53377 An example for runc: There are two L3 caches in the two-socket machine, the default CBM is 0xfffff and the max CBM length is 20 bits. This configuration assigns 4/5 of L3 cache id 0 and the whole L3 cache id 1 for the container: "linux": { "resources": { "intelRdt": { "l3CacheSchema": "L3:0=ffff0;1=fffff" } } } Signed-off-by: Xiaochen Shen --- events.go | 21 +- libcontainer/cgroups/cgroups.go | 3 + libcontainer/cgroups/fs/apply_raw.go | 102 +++-- libcontainer/cgroups/fs/apply_raw_test.go | 16 +- libcontainer/cgroups/fs/intelrdt.go | 395 ++++++++++++++++++ libcontainer/cgroups/fs/intelrdt_test.go | 70 ++++ libcontainer/cgroups/stats.go | 16 +- .../cgroups/systemd/apply_nosystemd.go | 6 +- libcontainer/cgroups/systemd/apply_systemd.go | 90 +++- libcontainer/configs/cgroup_unix.go | 4 + libcontainer/container_linux.go | 5 + libcontainer/container_linux_test.go | 28 +- libcontainer/factory_linux.go | 20 +- libcontainer/process_linux.go | 11 + libcontainer/specconv/spec_linux.go | 5 + 15 files changed, 726 insertions(+), 66 deletions(-) create mode 100644 libcontainer/cgroups/fs/intelrdt.go create mode 100644 libcontainer/cgroups/fs/intelrdt_test.go diff --git a/events.go b/events.go index 77cf5f540b1..c85064fa490 100644 --- a/events.go +++ b/events.go @@ -24,11 +24,12 @@ type event struct { // stats is the runc specific stats structure for stability when encoding and decoding stats. type stats struct { - Cpu cpu `json:"cpu"` - Memory memory `json:"memory"` - Pids pids `json:"pids"` - Blkio blkio `json:"blkio"` - Hugetlb map[string]hugetlb `json:"hugetlb"` + Cpu cpu `json:"cpu"` + Memory memory `json:"memory"` + Pids pids `json:"pids"` + Blkio blkio `json:"blkio"` + Hugetlb map[string]hugetlb `json:"hugetlb"` + IntelRdt intelRdt `json:"intelRdt"` } type hugetlb struct { @@ -95,6 +96,12 @@ type memory struct { Raw map[string]uint64 `json:"raw,omitempty"` } +type intelRdt struct { + // The read-only default "schemas" in root, for reference + L3CacheSchemaRoot string `json:"l3CacheSchemaRoot,omitempty"` + L3CacheSchema string `json:"l3CacheSchema,omitempty"` +} + var eventsCommand = cli.Command{ Name: "events", Usage: "display container events such as OOM notifications, cpu, memory, and IO usage statistics", @@ -223,6 +230,10 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats { for k, v := range cg.HugetlbStats { s.Hugetlb[k] = convertHugtlb(v) } + + is := cg.IntelRdtStats + s.IntelRdt.L3CacheSchemaRoot = is.IntelRdtRootStats.L3CacheSchema + s.IntelRdt.L3CacheSchema = is.IntelRdtGroupStats.L3CacheSchema return &s } diff --git a/libcontainer/cgroups/cgroups.go b/libcontainer/cgroups/cgroups.go index 35fc8eb961d..9335734aad8 100644 --- a/libcontainer/cgroups/cgroups.go +++ b/libcontainer/cgroups/cgroups.go @@ -39,6 +39,9 @@ type Manager interface { // Sets the cgroup as configured. Set(container *configs.Config) error + + // Get non-cgroup resource path + GetResourcePath() string } type NotFoundError struct { diff --git a/libcontainer/cgroups/fs/apply_raw.go b/libcontainer/cgroups/fs/apply_raw.go index 30b20632b54..19f62d0f3c5 100644 --- a/libcontainer/cgroups/fs/apply_raw.go +++ b/libcontainer/cgroups/fs/apply_raw.go @@ -31,6 +31,7 @@ var ( &PerfEventGroup{}, &FreezerGroup{}, &NameGroup{GroupName: "name=systemd", Join: true}, + // If Intel RDT is enabled, will append IntelRdtGroup later } HugePageSizes, _ = cgroups.GetHugePageSize() ) @@ -62,9 +63,11 @@ type subsystem interface { } type Manager struct { - mu sync.Mutex - Cgroups *configs.Cgroup - Paths map[string]string + mu sync.Mutex + Cgroups *configs.Cgroup + Paths map[string]string + ContainerId string + ResourcePath string } // The absolute path to the root of the cgroup hierarchies. @@ -94,10 +97,11 @@ func getCgroupRoot() (string, error) { } type cgroupData struct { - root string - innerPath string - config *configs.Cgroup - pid int + root string + innerPath string + config *configs.Cgroup + pid int + containerId string } func (m *Manager) Apply(pid int) (err error) { @@ -109,7 +113,7 @@ func (m *Manager) Apply(pid int) (err error) { var c = m.Cgroups - d, err := getCgroupData(m.Cgroups, pid) + d, err := getCgroupData(m.Cgroups, pid, m.ContainerId) if err != nil { return err } @@ -131,23 +135,38 @@ func (m *Manager) Apply(pid int) (err error) { } paths := make(map[string]string) + + // If Intel RDT is enabled, append IntelRdtGroup to subsystems + if IsIntelRdtEnabled() && m.Cgroups.Resources.IntelRdtL3CacheSchema != "" { + subsystems = append(subsystems, &IntelRdtGroup{}) + intelRdtPath, err := GetIntelRdtPath(m.ContainerId) + if err != nil { + return err + } + m.ResourcePath = intelRdtPath + } + for _, sys := range subsystems { if err := sys.Apply(d); err != nil { return err } - // TODO: Apply should, ideally, be reentrant or be broken up into a separate - // create and join phase so that the cgroup hierarchy for a container can be - // created then join consists of writing the process pids to cgroup.procs - p, err := d.path(sys.Name()) - if err != nil { - // The non-presence of the devices subsystem is - // considered fatal for security reasons. - if cgroups.IsNotFound(err) && sys.Name() != "devices" { - continue + + // Intel RDT "resource control" filesystem is not in cgroup path + if sys.Name() != "intel_rdt" { + // TODO: Apply should, ideally, be reentrant or be broken up into a separate + // create and join phase so that the cgroup hierarchy for a container can be + // created then join consists of writing the process pids to cgroup.procs + p, err := d.path(sys.Name()) + if err != nil { + // The non-presence of the devices subsystem is + // considered fatal for security reasons. + if cgroups.IsNotFound(err) && sys.Name() != "devices" { + continue + } + return err } - return err + paths[sys.Name()] = p } - paths[sys.Name()] = p } m.Paths = paths return nil @@ -163,6 +182,12 @@ func (m *Manager) Destroy() error { return err } m.Paths = make(map[string]string) + + // Intel RDT "resource control" filesystem + if m.ResourcePath != "" { + return os.RemoveAll(m.ResourcePath) + } + m.ResourcePath = "" return nil } @@ -173,6 +198,13 @@ func (m *Manager) GetPaths() map[string]string { return paths } +func (m *Manager) GetResourcePath() string { + m.mu.Lock() + path := m.ResourcePath + m.mu.Unlock() + return path +} + func (m *Manager) GetStats() (*cgroups.Stats, error) { m.mu.Lock() defer m.mu.Unlock() @@ -186,6 +218,24 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { return nil, err } } + + // Intel RDT "resource control" filesystem stats + if IsIntelRdtEnabled() && m.Cgroups.Resources.IntelRdtL3CacheSchema != "" { + intelRdtPath, err := GetIntelRdtPath(m.ContainerId) + if err != nil || !cgroups.PathExists(intelRdtPath) { + return nil, err + } + sys, err := subsystems.Get("intel_rdt") + if err == errSubsystemDoesNotExist { + // In case IntelRdtGroup is not appended to subsystems + subsystems = append(subsystems, &IntelRdtGroup{}) + } + sys, _ = subsystems.Get("intel_rdt") + if err := sys.GetStats(intelRdtPath, stats); err != nil { + return nil, err + } + } + return stats, nil } @@ -199,6 +249,9 @@ func (m *Manager) Set(container *configs.Config) error { paths := m.GetPaths() for _, sys := range subsystems { path := paths[sys.Name()] + if sys.Name() == "intel_rdt" { + path = m.GetResourcePath() + } if err := sys.Set(path, container.Cgroups); err != nil { return err } @@ -241,7 +294,7 @@ func (m *Manager) GetAllPids() ([]int, error) { return cgroups.GetAllPids(paths["devices"]) } -func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) { +func getCgroupData(c *configs.Cgroup, pid int, containerId string) (*cgroupData, error) { root, err := getCgroupRoot() if err != nil { return nil, err @@ -262,10 +315,11 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) { } return &cgroupData{ - root: root, - innerPath: innerPath, - config: c, - pid: pid, + root: root, + innerPath: innerPath, + config: c, + pid: pid, + containerId: containerId, }, nil } diff --git a/libcontainer/cgroups/fs/apply_raw_test.go b/libcontainer/cgroups/fs/apply_raw_test.go index ba4e9e543c4..83cff3ba6c7 100644 --- a/libcontainer/cgroups/fs/apply_raw_test.go +++ b/libcontainer/cgroups/fs/apply_raw_test.go @@ -20,7 +20,7 @@ func TestInvalidCgroupPath(t *testing.T) { Path: "../../../../../../../../../../some/path", } - data, err := getCgroupData(config, 0) + data, err := getCgroupData(config, 0, "") if err != nil { t.Errorf("couldn't get cgroup data: %v", err) } @@ -51,7 +51,7 @@ func TestInvalidAbsoluteCgroupPath(t *testing.T) { Path: "/../../../../../../../../../../some/path", } - data, err := getCgroupData(config, 0) + data, err := getCgroupData(config, 0, "") if err != nil { t.Errorf("couldn't get cgroup data: %v", err) } @@ -84,7 +84,7 @@ func TestInvalidCgroupParent(t *testing.T) { Name: "name", } - data, err := getCgroupData(config, 0) + data, err := getCgroupData(config, 0, "") if err != nil { t.Errorf("couldn't get cgroup data: %v", err) } @@ -117,7 +117,7 @@ func TestInvalidAbsoluteCgroupParent(t *testing.T) { Name: "name", } - data, err := getCgroupData(config, 0) + data, err := getCgroupData(config, 0, "") if err != nil { t.Errorf("couldn't get cgroup data: %v", err) } @@ -150,7 +150,7 @@ func TestInvalidCgroupName(t *testing.T) { Name: "../../../../../../../../../../some/path", } - data, err := getCgroupData(config, 0) + data, err := getCgroupData(config, 0, "") if err != nil { t.Errorf("couldn't get cgroup data: %v", err) } @@ -184,7 +184,7 @@ func TestInvalidAbsoluteCgroupName(t *testing.T) { Name: "/../../../../../../../../../../some/path", } - data, err := getCgroupData(config, 0) + data, err := getCgroupData(config, 0, "") if err != nil { t.Errorf("couldn't get cgroup data: %v", err) } @@ -217,7 +217,7 @@ func TestInvalidCgroupNameAndParent(t *testing.T) { Name: "../../../../../../../../../../some/path", } - data, err := getCgroupData(config, 0) + data, err := getCgroupData(config, 0, "") if err != nil { t.Errorf("couldn't get cgroup data: %v", err) } @@ -250,7 +250,7 @@ func TestInvalidAbsoluteCgroupNameAndParent(t *testing.T) { Name: "/../../../../../../../../../../some/path", } - data, err := getCgroupData(config, 0) + data, err := getCgroupData(config, 0, "") if err != nil { t.Errorf("couldn't get cgroup data: %v", err) } diff --git a/libcontainer/cgroups/fs/intelrdt.go b/libcontainer/cgroups/fs/intelrdt.go new file mode 100644 index 00000000000..1a09fd9d808 --- /dev/null +++ b/libcontainer/cgroups/fs/intelrdt.go @@ -0,0 +1,395 @@ +// +build linux + +package fs + +import ( + "bufio" + "errors" + "fmt" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +/* + * About Intel RDT/CAT feature: + * Intel platforms with new Xeon CPU support Resource Director Technology (RDT). + * Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3 + * Cache is the only resource that is supported in RDT. + * + * This feature provides a way for the software to restrict cache allocation to a + * defined 'subset' of L3 cache which may be overlapping with other 'subsets'. + * The different subsets are identified by class of service (CLOS) and each CLOS + * has a capacity bitmask (CBM). + * + * For more information about Intel RDT/CAT can be found in the section 17.17 + * of Intel Software Developer Manual. + * + * About Intel RDT/CAT kernel interface: + * In Linux kernel, the interface is defined and exposed via "resource control" + * filesystem, which is a "cgroup-like" interface. + * + * Comparing with cgroups, it has similar process management lifecycle and + * interfaces in a container. But unlike cgroups' hierarchy, it has single level + * filesystem layout. + * + * Intel RDT "resource control" filesystem hierarchy: + * mount -t resctrl resctrl /sys/fs/resctrl + * tree /sys/fs/resctrl + * /sys/fs/resctrl/ + * |-- info + * | |-- L3 + * | |-- cbm_mask + * | |-- num_closids + * |-- cpus + * |-- schemata + * |-- tasks + * |-- + * |-- cpus + * |-- schemata + * |-- tasks + * + * For runc, we can make use of `tasks` and `schemata` configuration for L3 cache + * resource constraints. + * + * The file `tasks` has a list of tasks that belongs to this group (e.g., + * " group). Tasks can be added to a group by writing the task ID + * to the "tasks" file (which will automatically remove them from the previous + * group to which they belonged). New tasks created by fork(2) and clone(2) are + * added to the same group as their parent. If a pid is not in any sub group, it is + * in root group. + * + * The file `schemata` has allocation bitmasks/values for L3 cache on each socket, + * which contains L3 cache id and capacity bitmask (CBM). + * Format: "L3:=;=;..." + * For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0` + * which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. + * + * The valid L3 cache CBM is a *contiguous bits set* and number of bits that can + * be set is less than the max bit. The max bits in the CBM is varied among + * supported Intel Xeon platforms. In Intel RDT "resource control" filesystem + * layout, the CBM in a group should be a subset of the CBM in root. Kernel will + * check if it is valid when writing. e.g., 0xfffff in root indicates the max bits + * of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM + * values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + * + * For more information about Intel RDT/CAT kernel interface: + * https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/commit/?h=x86/cache&id=f20e57892806ad244eaec7a7ae365e78fee53377 + * + * An example for runc: + * There are two L3 caches in the two-socket machine, the default CBM is 0xfffff + * and the max CBM length is 20 bits. This configuration assigns 4/5 of L3 cache + * id 0 and the whole L3 cache id 1 for the container: + * + * "linux": { + * "resources": { + * "intelRdt": { + * "l3CacheSchema": "L3:0=ffff0;1=fffff" + * } + * } + * } + */ + +type IntelRdtGroup struct { +} + +func (s *IntelRdtGroup) Name() string { + return "intel_rdt" +} + +func (s *IntelRdtGroup) Apply(d *cgroupData) error { + data, err := getIntelRdtData(d.config, d.pid, d.containerId) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + if _, err := data.join(data.containerId); err != nil { + return err + } + + return nil +} + +func (s *IntelRdtGroup) Set(path string, cgroup *configs.Cgroup) error { + // About L3 cache schemata file: + // The schema has allocation masks/values for L3 cache on each socket, + // which contains L3 cache id and capacity bitmask (CBM). + // Format: "L3:=;=;..." + // For example, on a two-socket machine, L3's schema line could be: + // L3:0=ff;1=c0 + // Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. + // + // About L3 cache CBM validity: + // The valid L3 cache CBM is a *contiguous bits set* and number of + // bits that can be set is less than the max bit. The max bits in the + // CBM is varied among supported Intel Xeon platforms. In Intel RDT + // "resource control" filesystem layout, the CBM in a group should + // be a subset of the CBM in root. Kernel will check if it is valid + // when writing. + // e.g., 0xfffff in root indicates the max bits of CBM is 20 bits, + // which mapping to entire L3 cache capacity. Some valid CBM values + // to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + l3CacheSchema := cgroup.Resources.IntelRdtL3CacheSchema + if l3CacheSchema != "" { + if err := writeFile(path, "schemata", l3CacheSchema+"\n"); err != nil { + return err + } + } + return nil +} + +func (s *IntelRdtGroup) Remove(d *cgroupData) error { + path, err := GetIntelRdtPath(d.containerId) + if err != nil { + return err + } + if err := removePath(path, nil); err != nil { + return err + } + return nil +} + +func (s *IntelRdtGroup) GetStats(path string, stats *cgroups.Stats) error { + // The read-only default "schemata" in root + rootPath, err := getIntelRdtRoot() + if err != nil { + return err + } + schemaRoot, err := getCgroupParamString(rootPath, "schemata") + if err != nil { + return err + } + stats.IntelRdtStats.IntelRdtRootStats.L3CacheSchema = schemaRoot + + // The stats in "container_id" group + schema, err := getCgroupParamString(path, "schemata") + if err != nil { + return err + } + stats.IntelRdtStats.IntelRdtGroupStats.L3CacheSchema = schema + + return nil +} + +const ( + IntelRdtTasks = "tasks" +) + +var ( + ErrIntelRdtNotEnabled = errors.New("intelrdt: config provided but Intel RDT not supported") + + // The root path of the Intel RDT "resource control" filesystem + intelRdtRoot string +) + +type intelRdtData struct { + root string + config *configs.Cgroup + pid int + containerId string +} + +// The read-only Intel RDT related system information in root +type IntelRdtInfo struct { + CbmMask uint64 `json:"cbm_mask,omitempty"` + NumClosid uint64 `json:"num_closid,omitempty"` +} + +// Return the mount point path of Intel RDT "resource control" filesysem +func findIntelRdtMountpointDir() (string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err + } + defer f.Close() + + s := bufio.NewScanner(f) + for s.Scan() { + text := s.Text() + fields := strings.Split(text, " ") + // Safe as mountinfo encodes mountpoints with spaces as \040. + index := strings.Index(text, " - ") + postSeparatorFields := strings.Fields(text[index+3:]) + numPostFields := len(postSeparatorFields) + + // This is an error as we can't detect if the mount is for "Intel RDT" + if numPostFields == 0 { + return "", fmt.Errorf("Found no fields post '-' in %q", text) + } + + if postSeparatorFields[0] == "resctrl" { + // Check that the mount is properly formated. + if numPostFields < 3 { + return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) + } + + return fields[4], nil + } + } + if err := s.Err(); err != nil { + return "", err + } + + return "", err +} + +// Gets the root path of Intel RDT "resource control" filesystem +func getIntelRdtRoot() (string, error) { + if intelRdtRoot != "" { + return intelRdtRoot, nil + } + + root, err := findIntelRdtMountpointDir() + if err != nil { + return "", err + } + + if _, err := os.Stat(root); err != nil { + return "", err + } + + intelRdtRoot = root + return intelRdtRoot, nil +} + +func getIntelRdtData(c *configs.Cgroup, pid int, containerId string) (*intelRdtData, error) { + rootPath, err := getIntelRdtRoot() + if err != nil { + return nil, err + } + return &intelRdtData{ + root: rootPath, + config: c, + pid: pid, + containerId: containerId, + }, nil +} + +// WriteIntelRdtTasks writes the specified pid into the "tasks" file +func WriteIntelRdtTasks(dir string, pid int) error { + if dir == "" { + return fmt.Errorf("no such directory for %s", IntelRdtTasks) + } + + // Dont attach any pid if -1 is specified as a pid + if pid != -1 { + if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil { + return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err) + } + } + return nil +} + +func (raw *intelRdtData) join(name string) (string, error) { + path := filepath.Join(raw.root, name) + if err := os.MkdirAll(path, 0755); err != nil { + return "", err + } + + if err := WriteIntelRdtTasks(path, raw.pid); err != nil { + return "", err + } + return path, nil +} + +func isIntelRdtMounted() bool { + _, err := getIntelRdtRoot() + if err != nil { + if !cgroups.IsNotFound(err) { + return false + } + + // If not mounted, we try to mount again: + // mount -t resctrl resctrl /sys/fs/resctrl + if err := os.MkdirAll("/sys/fs/resctrl", 0755); err != nil { + return false + } + if err := exec.Command("mount", "-t", "resctrl", "resctrl", "/sys/fs/resctrl").Run(); err != nil { + return false + } + } + + return true +} + +func parseCpuInfoFile(path string) (bool, error) { + f, err := os.Open(path) + if err != nil { + return false, err + } + defer f.Close() + + s := bufio.NewScanner(f) + for s.Scan() { + if err := s.Err(); err != nil { + return false, err + } + + text := s.Text() + flags := strings.Split(text, " ") + + for _, flag := range flags { + if flag == "rdt_a" { + return true, nil + } + } + } + return false, nil +} + +// Check if Intel RDT is enabled +func IsIntelRdtEnabled() bool { + // 1. check if hardware and kernel support Intel RDT feature + // "rdt" flag is set if supported + isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo") + if err != nil { + return false + } + + // 2. check if Intel RDT "resource control" filesystem is mounted + isMounted := isIntelRdtMounted() + + return isFlagSet && isMounted +} + +// Get Intel RDT "resource control" filesystem path +func GetIntelRdtPath(id string) (string, error) { + rootPath, err := getIntelRdtRoot() + if err != nil { + return "", err + } + + path := filepath.Join(rootPath, id) + return path, nil +} + +// Get read-only Intel RDT related system information +func GetIntelRdtInfo() (*IntelRdtInfo, error) { + intelRdtInfo := &IntelRdtInfo{} + + rootPath, err := getIntelRdtRoot() + if err != nil { + return nil, err + } + + path := filepath.Join(rootPath, "info", "l3") + cbmMask, err := getCgroupParamUint(path, "cbm_mask") + if err != nil { + return nil, err + } + numClosid, err := getCgroupParamUint(path, "num_closid") + if err != nil { + return nil, err + } + + intelRdtInfo.CbmMask = cbmMask + intelRdtInfo.NumClosid = numClosid + + return intelRdtInfo, nil +} diff --git a/libcontainer/cgroups/fs/intelrdt_test.go b/libcontainer/cgroups/fs/intelrdt_test.go new file mode 100644 index 00000000000..9f597345ec4 --- /dev/null +++ b/libcontainer/cgroups/fs/intelrdt_test.go @@ -0,0 +1,70 @@ +// +build linux + +package fs + +import ( + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups" +) + +func TestIntelRdtSetL3CacheSchema(t *testing.T) { + if !IsIntelRdtEnabled() { + return + } + + helper := NewCgroupTestUtil("intel_rdt", t) + defer helper.cleanup() + + const ( + l3CacheSchemaBefore = "L3:0=f;1=f0" + l3CacheSchemeAfter = "L3:0=f0;1=f" + ) + + helper.writeFileContents(map[string]string{ + "schemata": l3CacheSchemaBefore + "\n", + }) + + helper.CgroupData.config.Resources.IntelRdtL3CacheSchema = l3CacheSchemeAfter + intelrdt := &IntelRdtGroup{} + if err := intelrdt.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamString(helper.CgroupPath, "schemata") + if err != nil { + t.Fatalf("Failed to parse file 'schemata' - %s", err) + } + + if value != l3CacheSchemeAfter { + t.Fatal("Got the wrong value, set 'schemata' failed.") + } +} + +func TestIntelRdtStats(t *testing.T) { + if !IsIntelRdtEnabled() { + return + } + + helper := NewCgroupTestUtil("intel_rdt", t) + defer helper.cleanup() + + const ( + l3CacheSchemaContent = "L3:0=ffff0;1=fff00" + ) + + helper.writeFileContents(map[string]string{ + "schemata": l3CacheSchemaContent + "\n", + }) + + intelrdt := &IntelRdtGroup{} + stats := *cgroups.NewStats() + if err := intelrdt.GetStats(helper.CgroupPath, &stats); err != nil { + t.Fatal(err) + } + + if stats.IntelRdtStats.IntelRdtGroupStats.L3CacheSchema != l3CacheSchemaContent { + t.Fatalf("Expected '%q', got '%q' for file 'schemata'", + l3CacheSchemaContent, stats.IntelRdtStats.IntelRdtGroupStats.L3CacheSchema) + } +} diff --git a/libcontainer/cgroups/stats.go b/libcontainer/cgroups/stats.go index b483f1bf983..3622f927099 100644 --- a/libcontainer/cgroups/stats.go +++ b/libcontainer/cgroups/stats.go @@ -90,13 +90,27 @@ type HugetlbStats struct { Failcnt uint64 `json:"failcnt"` } +type IntelRdtRootStats struct { + L3CacheSchema string `json:"l3_cache_schema,omitempty"` +} + +type IntelRdtGroupStats struct { + L3CacheSchema string `json:"l3_cache_schema,omitempty"` +} + +type IntelRdtStats struct { + IntelRdtRootStats IntelRdtRootStats `json:"intel_rdt_root_stats,omitempty"` + IntelRdtGroupStats IntelRdtGroupStats `json:"intel_rdt_group_stats,omitempty"` +} + type Stats struct { CpuStats CpuStats `json:"cpu_stats,omitempty"` MemoryStats MemoryStats `json:"memory_stats,omitempty"` PidsStats PidsStats `json:"pids_stats,omitempty"` BlkioStats BlkioStats `json:"blkio_stats,omitempty"` // the map is in the format "size of hugepage: stats of the hugepage" - HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` + HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` + IntelRdtStats IntelRdtStats `json:"intel_rdt_stats,omitempty"` } func NewStats() *Stats { diff --git a/libcontainer/cgroups/systemd/apply_nosystemd.go b/libcontainer/cgroups/systemd/apply_nosystemd.go index 7de9ae6050b..a13cb53e40c 100644 --- a/libcontainer/cgroups/systemd/apply_nosystemd.go +++ b/libcontainer/cgroups/systemd/apply_nosystemd.go @@ -10,8 +10,10 @@ import ( ) type Manager struct { - Cgroups *configs.Cgroup - Paths map[string]string + Cgroups *configs.Cgroup + Paths map[string]string + ContainerId string + ResourcePath string } func UseSystemd() bool { diff --git a/libcontainer/cgroups/systemd/apply_systemd.go b/libcontainer/cgroups/systemd/apply_systemd.go index fd428f90cb9..7bb0add93e8 100644 --- a/libcontainer/cgroups/systemd/apply_systemd.go +++ b/libcontainer/cgroups/systemd/apply_systemd.go @@ -21,9 +21,11 @@ import ( ) type Manager struct { - mu sync.Mutex - Cgroups *configs.Cgroup - Paths map[string]string + mu sync.Mutex + Cgroups *configs.Cgroup + Paths map[string]string + ContainerId string + ResourcePath string } type subsystem interface { @@ -62,6 +64,7 @@ var subsystems = subsystemSet{ &fs.NetPrioGroup{}, &fs.NetClsGroup{}, &fs.NameGroup{GroupName: "name=systemd"}, + // If Intel RDT is enabled, will append IntelRdtGroup later } const ( @@ -286,21 +289,36 @@ func (m *Manager) Apply(pid int) error { return err } + // If Intel RDT is enabled, append IntelRdtGroup to subsystems + if fs.IsIntelRdtEnabled() && m.Cgroups.Resources.IntelRdtL3CacheSchema != "" { + subsystems = append(subsystems, &fs.IntelRdtGroup{}) + + // Intel RDT "resource control" is not real cgroup, it will not join cgroup path + intelRdtPath, err := joinIntelRdt(c, pid, m.ContainerId) + if err != nil { + return err + } + m.ResourcePath = intelRdtPath + } + if err := joinCgroups(c, pid); err != nil { return err } paths := make(map[string]string) for _, s := range subsystems { - subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name()) - if err != nil { - // Don't fail if a cgroup hierarchy was not found, just skip this subsystem - if cgroups.IsNotFound(err) { - continue + // Intel RDT "resource control" filesystem is not in cgroup path + if s.Name() != "intel_rdt" { + subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name()) + if err != nil { + // Don't fail if a cgroup hierarchy was not found, just skip this subsystem + if cgroups.IsNotFound(err) { + continue + } + return err } - return err + paths[s.Name()] = subsystemPath } - paths[s.Name()] = subsystemPath } m.Paths = paths return nil @@ -317,6 +335,12 @@ func (m *Manager) Destroy() error { return err } m.Paths = make(map[string]string) + + // Intel RDT "resource control" filesystem + if m.ResourcePath != "" { + return os.RemoveAll(m.ResourcePath) + } + m.ResourcePath = "" return nil } @@ -327,6 +351,13 @@ func (m *Manager) GetPaths() map[string]string { return paths } +func (m *Manager) GetResourcePath() string { + m.mu.Lock() + path := m.ResourcePath + m.mu.Unlock() + return path +} + func writeFile(dir, file, data string) error { // Normally dir should not be empty, one case is that cgroup subsystem // is not mounted, we will get empty dir, and we want it fail here. @@ -350,6 +381,20 @@ func join(c *configs.Cgroup, subsystem string, pid int) (string, error) { return path, nil } +func joinIntelRdt(c *configs.Cgroup, pid int, containerId string) (string, error) { + path, err := fs.GetIntelRdtPath(containerId) + if err != nil { + return "", err + } + if err := os.MkdirAll(path, 0755); err != nil { + return "", err + } + if err := fs.WriteIntelRdtTasks(path, pid); err != nil { + return "", err + } + return path, nil +} + func joinCgroups(c *configs.Cgroup, pid int) error { for _, sys := range subsystems { name := sys.Name() @@ -357,6 +402,10 @@ func joinCgroups(c *configs.Cgroup, pid int) error { case "name=systemd": // let systemd handle this break + case "intel_rdt": + // Intel RDT "resource control" is not real cgroup, + // it will not join cgroup path + break case "cpuset": path, err := getSubsystemPath(c, name) if err != nil && !cgroups.IsNotFound(err) { @@ -498,6 +547,23 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { } } + // Intel RDT "resource control" filesystem stats + if fs.IsIntelRdtEnabled() && m.Cgroups.Resources.IntelRdtL3CacheSchema != "" { + intelRdtPath, err := fs.GetIntelRdtPath(m.ContainerId) + if err != nil || !cgroups.PathExists(intelRdtPath) { + return nil, err + } + sys, err := subsystems.Get("intel_rdt") + if err == errSubsystemDoesNotExist { + // In case IntelRdtGroup is not appended to subsystems + subsystems = append(subsystems, &fs.IntelRdtGroup{}) + } + sys, _ = subsystems.Get("intel_rdt") + if err := sys.GetStats(intelRdtPath, stats); err != nil { + return nil, err + } + } + return stats, nil } @@ -514,6 +580,10 @@ func (m *Manager) Set(container *configs.Config) error { return err } + if sys.Name() == "intel_rdt" { + path = m.GetResourcePath() + } + if err := sys.Set(path, container.Cgroups); err != nil { return err } diff --git a/libcontainer/configs/cgroup_unix.go b/libcontainer/configs/cgroup_unix.go index 14d62898162..77a7ad5c56b 100644 --- a/libcontainer/configs/cgroup_unix.go +++ b/libcontainer/configs/cgroup_unix.go @@ -121,4 +121,8 @@ type Resources struct { // Set class identifier for container's network packets NetClsClassid uint32 `json:"net_cls_classid_u"` + + // Intel RDT: the schema for L3 cache id and capacity bitmask (CBM) + // Format: "L3:=;=;..." + IntelRdtL3CacheSchema string `json:"intel_rdt_l3_cache_schema"` } diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 82c6d8e4420..6b7244094c4 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -61,6 +61,9 @@ type State struct { // Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore ExternalDescriptors []string `json:"external_descriptors,omitempty"` + + // Intel RDT "resource control" filesystem path + IntelRdtPath string `json:"intel_rdt_path"` } // Container is a libcontainer container object. @@ -376,6 +379,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, return &setnsProcess{ cmd: cmd, cgroupPaths: c.cgroupManager.GetPaths(), + intelRdtPath: c.cgroupManager.GetResourcePath(), childPipe: childPipe, parentPipe: parentPipe, config: c.newInitConfig(p), @@ -1202,6 +1206,7 @@ func (c *linuxContainer) currentState() (*State, error) { Created: c.created, }, CgroupPaths: c.cgroupManager.GetPaths(), + IntelRdtPath: c.cgroupManager.GetResourcePath(), NamespacePaths: make(map[configs.NamespaceType]string), ExternalDescriptors: externalDescriptors, } diff --git a/libcontainer/container_linux_test.go b/libcontainer/container_linux_test.go index b7ce552ef02..f804369cc34 100644 --- a/libcontainer/container_linux_test.go +++ b/libcontainer/container_linux_test.go @@ -8,14 +8,16 @@ import ( "testing" "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs" "github.com/opencontainers/runc/libcontainer/configs" ) type mockCgroupManager struct { - pids []int - allPids []int - stats *cgroups.Stats - paths map[string]string + pids []int + allPids []int + stats *cgroups.Stats + paths map[string]string + intelRdtPath string } func (m *mockCgroupManager) GetPids() ([]int, error) { @@ -46,6 +48,10 @@ func (m *mockCgroupManager) GetPaths() map[string]string { return m.paths } +func (m *mockCgroupManager) GetResourcePath() string { + return m.intelRdtPath +} + func (m *mockCgroupManager) Freeze(state configs.FreezerState) error { return nil } @@ -132,9 +138,10 @@ func TestGetContainerStats(t *testing.T) { func TestGetContainerState(t *testing.T) { var ( - pid = os.Getpid() - expectedMemoryPath = "/sys/fs/cgroup/memory/myid" - expectedNetworkPath = "/networks/fd" + pid = os.Getpid() + expectedMemoryPath = "/sys/fs/cgroup/memory/myid" + expectedNetworkPath = "/networks/fd" + expectedIntelRdtPath = "sys/fs/resctrl/myid" ) container := &linuxContainer{ id: "myid", @@ -164,6 +171,7 @@ func TestGetContainerState(t *testing.T) { paths: map[string]string{ "memory": expectedMemoryPath, }, + intelRdtPath: expectedIntelRdtPath, }, } container.state = &createdState{c: container} @@ -184,6 +192,12 @@ func TestGetContainerState(t *testing.T) { if memPath := paths["memory"]; memPath != expectedMemoryPath { t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath) } + if fs.IsIntelRdtEnabled() { + intelRdtPath := state.IntelRdtPath + if intelRdtPath != expectedIntelRdtPath { + t.Fatalf("expected intelRdt path %q but received %q", expectedIntelRdtPath, intelRdtPath) + } + } for _, ns := range container.config.Namespaces { path := state.NamespacePaths[ns.Type] if path == "" { diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index 6e2bf3ad49b..3d295c5149a 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -43,10 +43,11 @@ func InitArgs(args ...string) func(*LinuxFactory) error { // SystemdCgroups is an options func to configure a LinuxFactory to return // containers that use systemd to create and manage cgroups. func SystemdCgroups(l *LinuxFactory) error { - l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { + l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string, containerId string) cgroups.Manager { return &systemd.Manager{ - Cgroups: config, - Paths: paths, + Cgroups: config, + Paths: paths, + ContainerId: containerId, } } return nil @@ -56,10 +57,11 @@ func SystemdCgroups(l *LinuxFactory) error { // containers that use the native cgroups filesystem implementation to // create and manage cgroups. func Cgroupfs(l *LinuxFactory) error { - l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { + l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string, containerId string) cgroups.Manager { return &fs.Manager{ - Cgroups: config, - Paths: paths, + Cgroups: config, + Paths: paths, + ContainerId: containerId, } } return nil @@ -128,7 +130,7 @@ type LinuxFactory struct { Validator validate.Validator // NewCgroupsManager returns an initialized cgroups manager for a single container. - NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager + NewCgroupsManager func(config *configs.Cgroup, paths map[string]string, containerId string) cgroups.Manager } func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) { @@ -177,7 +179,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err config: config, initArgs: l.InitArgs, criuPath: l.CriuPath, - cgroupManager: l.NewCgroupsManager(config.Cgroups, nil), + cgroupManager: l.NewCgroupsManager(config.Cgroups, nil, id), } c.state = &stoppedState{c: c} return c, nil @@ -204,7 +206,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) { config: &state.Config, initArgs: l.InitArgs, criuPath: l.CriuPath, - cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths), + cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths, id), root: containerRoot, created: state.Created, } diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index 4b54e4b215c..bfde5181280 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -14,6 +14,7 @@ import ( "syscall" "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/utils" @@ -47,6 +48,7 @@ type setnsProcess struct { parentPipe *os.File childPipe *os.File cgroupPaths map[string]string + intelRdtPath string config *initConfig fds []string process *Process @@ -87,6 +89,15 @@ func (p *setnsProcess) start() (err error) { return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid()) } } + if p.intelRdtPath != "" { + // if Intel RDT "resource control" filesystem path exists + _, err := os.Stat(p.intelRdtPath) + if err == nil { + if err := fs.WriteIntelRdtTasks(p.intelRdtPath, p.pid()); err != nil { + return newSystemErrorWithCausef(err, "adding pid %d to Intel RDT resource control filesystem", p.pid()) + } + } + } // set oom_score_adj if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { return newSystemErrorWithCause(err, "setting oom score") diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index fec19784ffb..7ebc96fc1ed 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -464,6 +464,11 @@ func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (* }) } } + if r.IntelRdt != nil { + if r.IntelRdt.L3CacheSchema != nil { + c.Resources.IntelRdtL3CacheSchema = *r.IntelRdt.L3CacheSchema + } + } return c, nil }