Skip to content

Commit 9ec2870

Browse files
author
Ivan Shvedunov
committed
Fix kublelet killing VMs upon Virtlet pod restart
The culprit were cgroups that aren't handled by libvirt. Of those, we already handle hugetlb by moving the emulator process out of it. Still, need to do the same for systemd (name=systemd) and pids cgroup controllers. The problem manifested itself when cgroup-per-qos is enabled for kubelet. This is the default, but in current kdc it may be disabled as a workaround for old kubelet bug. This bug is already fixed, so the workaround is to be removed soon.
1 parent 34f37d9 commit 9ec2870

File tree

2 files changed

+18
-7
lines changed

2 files changed

+18
-7
lines changed

cmd/vmwrapper/vmwrapper.go

+10-6
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,17 @@ func main() {
7171
}
7272
}
7373

74-
// FIXME: move the pid of qemu instance out of /kubepods/podxxxxxxx
75-
// for some cases it will be killed by kubelet after the virtlet pod is deleted/recreated
74+
// FIXME: move the pid of qemu instance out of kubelet-managed
75+
// for cgroups that aren't managed by libvirt.
76+
// If we don't do this, the VM pod will be killed by kubelet when Virtlet pod
77+
// is removed dnd cgroup-per-qos is enabled in kubelet settings.
7678
cm := cgroups.NewManager(os.Getpid(), nil)
77-
if _, err := cm.GetProcessController("hugetlb"); err == nil {
78-
err = cm.MoveProcess("hugetlb", "/")
79-
if err != nil {
80-
glog.Warningf("failed to move pid into hugetlb path /: %v", err)
79+
for _, ctl := range []string{"hugetlb", "systemd", "pids"} {
80+
if _, err := cm.GetProcessController(ctl); err == nil {
81+
err = cm.MoveProcess(ctl, "/")
82+
if err != nil {
83+
glog.Warningf("failed to move pid into cgroup %q path /: %v", ctl, err)
84+
}
8185
}
8286
}
8387

pkg/utils/cgroups/controllers.go

+8-1
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,15 @@ func (c *RealManager) GetProcessControllers() (map[string]string, error) {
9494
// "6:memory:/user.slice/user-xxx.slice/session-xx.scope"
9595
parts := strings.SplitN(line, ":", 3)
9696

97+
name := parts[1]
98+
if strings.HasPrefix(name, "name=") {
99+
// Handle named cgroup hierarchies like name=systemd
100+
// The corresponding directory tree will be /sys/fs/cgroup/systemd
101+
name = name[5:]
102+
}
103+
97104
// use second part as controller name and third as its path
98-
ctrls[parts[1]] = parts[2]
105+
ctrls[name] = parts[2]
99106

100107
if err == io.EOF {
101108
break

0 commit comments

Comments
 (0)